mirror of
https://github.com/apache/druid.git
synced 2025-02-16 23:15:16 +00:00
More optimize CNF conversion of filters (#9634)
* More optimize CNF conversion of filters * update license * fix build * checkstyle * remove unnecessary code * split helper * license * checkstyle * add comments on cnf conversion
This commit is contained in:
parent
08851c0198
commit
a6790ff22a
3
LICENSE
3
LICENSE
@ -215,7 +215,7 @@ Apache License version 2.0
|
||||
SOURCE/JAVA-CORE
|
||||
This product contains conjunctive normal form conversion code, a variance aggregator algorithm, and Bloom filter
|
||||
adapted from Apache Hive.
|
||||
* processing/src/main/java/org/apache/druid/segment/filter/Filters.java
|
||||
* processing/src/main/java/org/apache/druid/segment/filter/cnf/HiveCnfHelper.java
|
||||
* extensions-core/stats/src/main/java/io/druid/query/aggregation/variance/VarianceAggregatorCollector.java
|
||||
* extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilter.java
|
||||
|
||||
@ -224,6 +224,7 @@ SOURCE/JAVA-CORE
|
||||
|
||||
This product contains SQL query planning code adapted from Apache Calcite.
|
||||
* sql/src/main/java/org/apache/druid/sql/calcite/
|
||||
* processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java
|
||||
|
||||
This product contains Kerberos authentication code adapted from Apache Hadoop.
|
||||
* extensions-core/druid-kerberos/src/main/java/org/apache/druid/security/kerberos/
|
||||
|
@ -377,7 +377,7 @@ public class FilterPartitionBenchmark
|
||||
Filter orFilter = new OrFilter(Arrays.asList(filter, filter2));
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCNF(orFilter));
|
||||
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCnf(orFilter));
|
||||
readCursors(cursors, blackhole);
|
||||
}
|
||||
|
||||
@ -451,7 +451,7 @@ public class FilterPartitionBenchmark
|
||||
);
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCNF(dimFilter3.toFilter()));
|
||||
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCnf(dimFilter3.toFilter()));
|
||||
readCursors(cursors, blackhole);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.filter;
|
||||
|
||||
import org.apache.druid.query.BitmapResultFactory;
|
||||
import org.apache.druid.query.filter.BitmapIndexSelector;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
import org.apache.druid.query.filter.ValueMatcher;
|
||||
import org.apache.druid.segment.ColumnSelector;
|
||||
import org.apache.druid.segment.ColumnSelectorFactory;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
public class FalseFilter implements Filter
|
||||
{
|
||||
private static final FalseFilter INSTANCE = new FalseFilter();
|
||||
|
||||
public static FalseFilter instance()
|
||||
{
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
private FalseFilter()
|
||||
{
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
|
||||
{
|
||||
return bitmapResultFactory.wrapAllFalse(Filters.allFalse(selector));
|
||||
}
|
||||
|
||||
@Override
|
||||
public double estimateSelectivity(BitmapIndexSelector indexSelector)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
|
||||
{
|
||||
return FalseValueMatcher.instance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsBitmapIndex(BitmapIndexSelector selector)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getRequiredColumns()
|
||||
{
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "false";
|
||||
}
|
||||
}
|
@ -19,7 +19,6 @@
|
||||
|
||||
package org.apache.druid.segment.filter;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
@ -30,7 +29,6 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.query.BitmapResultFactory;
|
||||
import org.apache.druid.query.Query;
|
||||
import org.apache.druid.query.filter.BitmapIndexSelector;
|
||||
import org.apache.druid.query.filter.BooleanFilter;
|
||||
import org.apache.druid.query.filter.DimFilter;
|
||||
import org.apache.druid.query.filter.DruidPredicateFactory;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
@ -44,12 +42,12 @@ import org.apache.druid.segment.column.BitmapIndex;
|
||||
import org.apache.druid.segment.column.ColumnHolder;
|
||||
import org.apache.druid.segment.data.CloseableIndexed;
|
||||
import org.apache.druid.segment.data.Indexed;
|
||||
import org.apache.druid.segment.filter.cnf.CalciteCnfHelper;
|
||||
import org.apache.druid.segment.filter.cnf.HiveCnfHelper;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
@ -426,175 +424,29 @@ public class Filters
|
||||
return null;
|
||||
}
|
||||
boolean useCNF = query.getContextBoolean(CTX_KEY_USE_FILTER_CNF, false);
|
||||
return useCNF ? toCNF(filter) : filter;
|
||||
return useCNF ? Filters.toCnf(filter) : filter;
|
||||
}
|
||||
|
||||
public static Filter toCNF(Filter current)
|
||||
public static Filter toCnf(Filter current)
|
||||
{
|
||||
current = pushDownNot(current);
|
||||
current = flatten(current);
|
||||
current = convertToCNFInternal(current);
|
||||
current = flatten(current);
|
||||
// Push down NOT filters to leaves if possible to remove NOT on NOT filters and reduce hierarchy.
|
||||
// ex) ~(a OR ~b) => ~a AND b
|
||||
current = HiveCnfHelper.pushDownNot(current);
|
||||
// Flatten nested AND and OR filters if possible.
|
||||
// ex) a AND (b AND c) => a AND b AND c
|
||||
current = HiveCnfHelper.flatten(current);
|
||||
// Pull up AND filters first to convert the filter into a conjunctive form.
|
||||
// It is important to pull before CNF conversion to not create a huge CNF.
|
||||
// ex) (a AND b) OR (a AND c AND d) => a AND (b OR (c AND d))
|
||||
current = CalciteCnfHelper.pull(current);
|
||||
// Convert filter to CNF.
|
||||
// a AND (b OR (c AND d)) => a AND (b OR c) AND (b OR d)
|
||||
current = HiveCnfHelper.convertToCnf(current);
|
||||
// Flatten again to remove any flattenable nested AND or OR filters created during CNF conversion.
|
||||
current = HiveCnfHelper.flatten(current);
|
||||
return current;
|
||||
}
|
||||
|
||||
// CNF conversion functions were adapted from Apache Hive, see:
|
||||
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
|
||||
@VisibleForTesting
|
||||
static Filter pushDownNot(Filter current)
|
||||
{
|
||||
if (current instanceof NotFilter) {
|
||||
Filter child = ((NotFilter) current).getBaseFilter();
|
||||
if (child instanceof NotFilter) {
|
||||
return pushDownNot(((NotFilter) child).getBaseFilter());
|
||||
}
|
||||
if (child instanceof AndFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter grandChild : ((AndFilter) child).getFilters()) {
|
||||
children.add(pushDownNot(new NotFilter(grandChild)));
|
||||
}
|
||||
return new OrFilter(children);
|
||||
}
|
||||
if (child instanceof OrFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter grandChild : ((OrFilter) child).getFilters()) {
|
||||
children.add(pushDownNot(new NotFilter(grandChild)));
|
||||
}
|
||||
return new AndFilter(children);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (current instanceof AndFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter child : ((AndFilter) current).getFilters()) {
|
||||
children.add(pushDownNot(child));
|
||||
}
|
||||
return new AndFilter(children);
|
||||
}
|
||||
|
||||
|
||||
if (current instanceof OrFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter child : ((OrFilter) current).getFilters()) {
|
||||
children.add(pushDownNot(child));
|
||||
}
|
||||
return new OrFilter(children);
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
// CNF conversion functions were adapted from Apache Hive, see:
|
||||
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
|
||||
private static Filter convertToCNFInternal(Filter current)
|
||||
{
|
||||
if (current instanceof NotFilter) {
|
||||
return new NotFilter(convertToCNFInternal(((NotFilter) current).getBaseFilter()));
|
||||
}
|
||||
if (current instanceof AndFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter child : ((AndFilter) current).getFilters()) {
|
||||
children.add(convertToCNFInternal(child));
|
||||
}
|
||||
return new AndFilter(children);
|
||||
}
|
||||
if (current instanceof OrFilter) {
|
||||
// a list of leaves that weren't under AND expressions
|
||||
List<Filter> nonAndList = new ArrayList<Filter>();
|
||||
// a list of AND expressions that we need to distribute
|
||||
List<Filter> andList = new ArrayList<Filter>();
|
||||
for (Filter child : ((OrFilter) current).getFilters()) {
|
||||
if (child instanceof AndFilter) {
|
||||
andList.add(child);
|
||||
} else if (child instanceof OrFilter) {
|
||||
// pull apart the kids of the OR expression
|
||||
nonAndList.addAll(((OrFilter) child).getFilters());
|
||||
} else {
|
||||
nonAndList.add(child);
|
||||
}
|
||||
}
|
||||
if (!andList.isEmpty()) {
|
||||
Set<Filter> result = new HashSet<>();
|
||||
generateAllCombinations(result, andList, nonAndList);
|
||||
return new AndFilter(result);
|
||||
}
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
// CNF conversion functions were adapted from Apache Hive, see:
|
||||
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
|
||||
@VisibleForTesting
|
||||
static Filter flatten(Filter root)
|
||||
{
|
||||
if (root instanceof BooleanFilter) {
|
||||
List<Filter> children = new ArrayList<>(((BooleanFilter) root).getFilters());
|
||||
// iterate through the index, so that if we add more children,
|
||||
// they don't get re-visited
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
Filter child = flatten(children.get(i));
|
||||
// do we need to flatten?
|
||||
if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) {
|
||||
boolean first = true;
|
||||
Set<Filter> grandKids = ((BooleanFilter) child).getFilters();
|
||||
for (Filter grandkid : grandKids) {
|
||||
// for the first grandkid replace the original parent
|
||||
if (first) {
|
||||
first = false;
|
||||
children.set(i, grandkid);
|
||||
} else {
|
||||
children.add(++i, grandkid);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
children.set(i, child);
|
||||
}
|
||||
}
|
||||
// if we have a singleton AND or OR, just return the child
|
||||
if (children.size() == 1 && (root instanceof AndFilter || root instanceof OrFilter)) {
|
||||
return children.get(0);
|
||||
}
|
||||
|
||||
if (root instanceof AndFilter) {
|
||||
return new AndFilter(children);
|
||||
} else if (root instanceof OrFilter) {
|
||||
return new OrFilter(children);
|
||||
}
|
||||
}
|
||||
return root;
|
||||
}
|
||||
|
||||
// CNF conversion functions were adapted from Apache Hive, see:
|
||||
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
|
||||
private static void generateAllCombinations(
|
||||
Set<Filter> result,
|
||||
List<Filter> andList,
|
||||
List<Filter> nonAndList
|
||||
)
|
||||
{
|
||||
Set<Filter> children = ((AndFilter) andList.get(0)).getFilters();
|
||||
if (result.isEmpty()) {
|
||||
for (Filter child : children) {
|
||||
Set<Filter> a = new HashSet<>(nonAndList);
|
||||
a.add(child);
|
||||
result.add(new OrFilter(a));
|
||||
}
|
||||
} else {
|
||||
List<Filter> work = new ArrayList<>(result);
|
||||
result.clear();
|
||||
for (Filter child : children) {
|
||||
for (Filter or : work) {
|
||||
Set<Filter> a = new HashSet<>((((OrFilter) or).getFilters()));
|
||||
a.add(child);
|
||||
result.add(new OrFilter(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (andList.size() > 1) {
|
||||
generateAllCombinations(result, andList.subList(1, andList.size()), nonAndList);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method provides a "standard" implementation of {@link Filter#shouldUseBitmapIndex(BitmapIndexSelector)} which takes
|
||||
* a {@link Filter}, a {@link BitmapIndexSelector}, and {@link FilterTuning} to determine if:
|
||||
|
@ -0,0 +1,301 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.filter.cnf;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Iterables;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
import org.apache.druid.segment.filter.AndFilter;
|
||||
import org.apache.druid.segment.filter.FalseFilter;
|
||||
import org.apache.druid.segment.filter.OrFilter;
|
||||
import org.apache.druid.segment.filter.TrueFilter;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* All functions in this class were basically adopted from Apache Calcite and modified to use them in Druid.
|
||||
* See https://github.com/apache/calcite/blob/branch-1.21/core/src/main/java/org/apache/calcite/rex/RexUtil.java#L1615
|
||||
* for original implementation.
|
||||
*/
|
||||
public class CalciteCnfHelper
|
||||
{
|
||||
public static Filter pull(Filter rex)
|
||||
{
|
||||
final Set<Filter> operands;
|
||||
if (rex instanceof AndFilter) {
|
||||
operands = ((AndFilter) rex).getFilters();
|
||||
return and(pullList(operands));
|
||||
} else if (rex instanceof OrFilter) {
|
||||
operands = ((OrFilter) rex).getFilters();
|
||||
final Map<Filter, Filter> factors = commonFactors(operands);
|
||||
if (factors.isEmpty()) {
|
||||
return or(operands);
|
||||
}
|
||||
final List<Filter> list = new ArrayList<>();
|
||||
for (Filter operand : operands) {
|
||||
Filter removed = removeFactor(factors, operand);
|
||||
if (removed != null) {
|
||||
list.add(removed);
|
||||
}
|
||||
}
|
||||
if (list.isEmpty()) {
|
||||
return and(factors.values());
|
||||
} else if (list.size() == 1) {
|
||||
return and(Iterables.concat(factors.values(), ImmutableList.of(list.get(0))));
|
||||
} else {
|
||||
return and(Iterables.concat(factors.values(), ImmutableList.of(or(list))));
|
||||
}
|
||||
} else {
|
||||
return rex;
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Filter> pullList(Set<Filter> nodes)
|
||||
{
|
||||
final List<Filter> list = new ArrayList<>();
|
||||
for (Filter node : nodes) {
|
||||
Filter pulled = pull(node);
|
||||
if (pulled instanceof AndFilter) {
|
||||
list.addAll(((AndFilter) pulled).getFilters());
|
||||
} else {
|
||||
list.add(pulled);
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
private static Map<Filter, Filter> commonFactors(Set<Filter> nodes)
|
||||
{
|
||||
final Map<Filter, Filter> map = new HashMap<>();
|
||||
int i = 0;
|
||||
for (Filter node : nodes) {
|
||||
if (i++ == 0) {
|
||||
for (Filter conjunction : conjunctions(node)) {
|
||||
map.put(conjunction, conjunction);
|
||||
}
|
||||
} else {
|
||||
map.keySet().retainAll(conjunctions(node));
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private static Filter removeFactor(Map<Filter, Filter> factors, Filter node)
|
||||
{
|
||||
List<Filter> list = new ArrayList<>();
|
||||
for (Filter operand : conjunctions(node)) {
|
||||
if (!factors.containsKey(operand)) {
|
||||
list.add(operand);
|
||||
}
|
||||
}
|
||||
return and(list);
|
||||
}
|
||||
|
||||
private static Filter and(Iterable<? extends Filter> nodes)
|
||||
{
|
||||
return composeConjunction(nodes);
|
||||
}
|
||||
|
||||
private static Filter or(Iterable<? extends Filter> nodes)
|
||||
{
|
||||
return composeDisjunction(nodes);
|
||||
}
|
||||
|
||||
/** As {@link #composeConjunction(Iterable, boolean)} but never
|
||||
* returns null. */
|
||||
public static @Nonnull Filter composeConjunction(Iterable<? extends Filter> nodes)
|
||||
{
|
||||
final Filter e = composeConjunction(nodes, false);
|
||||
return Objects.requireNonNull(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a collection of expressions into an AND.
|
||||
* If there are zero expressions, returns TRUE.
|
||||
* If there is one expression, returns just that expression.
|
||||
* If any of the expressions are FALSE, returns FALSE.
|
||||
* Removes expressions that always evaluate to TRUE.
|
||||
* Returns null only if {@code nullOnEmpty} and expression is TRUE.
|
||||
*/
|
||||
public static Filter composeConjunction(Iterable<? extends Filter> nodes, boolean nullOnEmpty)
|
||||
{
|
||||
ImmutableList<Filter> list = flattenAnd(nodes);
|
||||
switch (list.size()) {
|
||||
case 0:
|
||||
return nullOnEmpty
|
||||
? null
|
||||
: TrueFilter.instance();
|
||||
case 1:
|
||||
return list.get(0);
|
||||
default:
|
||||
return new AndFilter(list);
|
||||
}
|
||||
}
|
||||
|
||||
/** Flattens a list of AND nodes.
|
||||
*
|
||||
* <p>Treats null nodes as literal TRUE (i.e. ignores them). */
|
||||
public static ImmutableList<Filter> flattenAnd(Iterable<? extends Filter> nodes)
|
||||
{
|
||||
if (nodes instanceof Collection && ((Collection) nodes).isEmpty()) {
|
||||
// Optimize common case
|
||||
return ImmutableList.of();
|
||||
}
|
||||
final ImmutableList.Builder<Filter> builder = ImmutableList.builder();
|
||||
final Set<Filter> set = new HashSet<>(); // to eliminate duplicates
|
||||
for (Filter node : nodes) {
|
||||
if (node != null) {
|
||||
addAnd(builder, set, node);
|
||||
}
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private static void addAnd(ImmutableList.Builder<Filter> builder, Set<Filter> digests, Filter node)
|
||||
{
|
||||
if (node instanceof AndFilter) {
|
||||
for (Filter operand : ((AndFilter) node).getFilters()) {
|
||||
addAnd(builder, digests, operand);
|
||||
}
|
||||
} else {
|
||||
if (!(node instanceof TrueFilter) && digests.add(node)) {
|
||||
builder.add(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a collection of expressions into an OR.
|
||||
* If there are zero expressions, returns FALSE.
|
||||
* If there is one expression, returns just that expression.
|
||||
* If any of the expressions are TRUE, returns TRUE.
|
||||
* Removes expressions that always evaluate to FALSE.
|
||||
* Flattens expressions that are ORs.
|
||||
*/
|
||||
@Nonnull public static Filter composeDisjunction(Iterable<? extends Filter> nodes)
|
||||
{
|
||||
final Filter e = composeDisjunction(nodes, false);
|
||||
return Objects.requireNonNull(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a collection of expressions into an OR,
|
||||
* optionally returning null if the list is empty.
|
||||
*/
|
||||
public static Filter composeDisjunction(Iterable<? extends Filter> nodes, boolean nullOnEmpty)
|
||||
{
|
||||
ImmutableList<Filter> list = flattenOr(nodes);
|
||||
switch (list.size()) {
|
||||
case 0:
|
||||
return nullOnEmpty ? null : FalseFilter.instance();
|
||||
case 1:
|
||||
return list.get(0);
|
||||
default:
|
||||
if (containsTrue(list)) {
|
||||
return TrueFilter.instance();
|
||||
}
|
||||
return new OrFilter(list);
|
||||
}
|
||||
}
|
||||
|
||||
/** Flattens a list of OR nodes. */
|
||||
public static ImmutableList<Filter> flattenOr(Iterable<? extends Filter> nodes)
|
||||
{
|
||||
if (nodes instanceof Collection && ((Collection) nodes).isEmpty()) {
|
||||
// Optimize common case
|
||||
return ImmutableList.of();
|
||||
}
|
||||
final ImmutableList.Builder<Filter> builder = ImmutableList.builder();
|
||||
final Set<Filter> set = new HashSet<>(); // to eliminate duplicates
|
||||
for (Filter node : nodes) {
|
||||
addOr(builder, set, node);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private static void addOr(ImmutableList.Builder<Filter> builder, Set<Filter> set, Filter node)
|
||||
{
|
||||
if (node instanceof OrFilter) {
|
||||
for (Filter operand : ((OrFilter) node).getFilters()) {
|
||||
addOr(builder, set, operand);
|
||||
}
|
||||
} else {
|
||||
if (set.add(node)) {
|
||||
builder.add(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a condition decomposed by AND.
|
||||
*
|
||||
* <p>For example, {@code conjunctions(TRUE)} returns the empty list;
|
||||
* {@code conjunctions(FALSE)} returns list {@code {FALSE}}.</p>
|
||||
*/
|
||||
public static List<Filter> conjunctions(Filter rexPredicate)
|
||||
{
|
||||
final List<Filter> list = new ArrayList<>();
|
||||
decomposeConjunction(rexPredicate, list);
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decomposes a predicate into a list of expressions that are AND'ed
|
||||
* together.
|
||||
*
|
||||
* @param rexPredicate predicate to be analyzed
|
||||
* @param rexList list of decomposed RexNodes
|
||||
*/
|
||||
public static void decomposeConjunction(Filter rexPredicate, List<Filter> rexList)
|
||||
{
|
||||
if (rexPredicate == null || rexPredicate instanceof TrueFilter) {
|
||||
return;
|
||||
}
|
||||
if (rexPredicate instanceof AndFilter) {
|
||||
for (Filter operand : ((AndFilter) rexPredicate).getFilters()) {
|
||||
decomposeConjunction(operand, rexList);
|
||||
}
|
||||
} else {
|
||||
rexList.add(rexPredicate);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean containsTrue(Iterable<Filter> nodes)
|
||||
{
|
||||
for (Filter node : nodes) {
|
||||
if (node instanceof TrueFilter) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private CalciteCnfHelper()
|
||||
{
|
||||
}
|
||||
}
|
@ -0,0 +1,190 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.filter.cnf;
|
||||
|
||||
import org.apache.druid.query.filter.BooleanFilter;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
import org.apache.druid.segment.filter.AndFilter;
|
||||
import org.apache.druid.segment.filter.NotFilter;
|
||||
import org.apache.druid.segment.filter.OrFilter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* All functions in this class were basically adopted from Apache Hive and modified to use them in Druid.
|
||||
* See https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
|
||||
* for original implementation.
|
||||
*/
|
||||
public class HiveCnfHelper
|
||||
{
|
||||
public static Filter pushDownNot(Filter current)
|
||||
{
|
||||
if (current instanceof NotFilter) {
|
||||
Filter child = ((NotFilter) current).getBaseFilter();
|
||||
if (child instanceof NotFilter) {
|
||||
return pushDownNot(((NotFilter) child).getBaseFilter());
|
||||
}
|
||||
if (child instanceof AndFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter grandChild : ((AndFilter) child).getFilters()) {
|
||||
children.add(pushDownNot(new NotFilter(grandChild)));
|
||||
}
|
||||
return new OrFilter(children);
|
||||
}
|
||||
if (child instanceof OrFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter grandChild : ((OrFilter) child).getFilters()) {
|
||||
children.add(pushDownNot(new NotFilter(grandChild)));
|
||||
}
|
||||
return new AndFilter(children);
|
||||
}
|
||||
}
|
||||
|
||||
if (current instanceof AndFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter child : ((AndFilter) current).getFilters()) {
|
||||
children.add(pushDownNot(child));
|
||||
}
|
||||
return new AndFilter(children);
|
||||
}
|
||||
|
||||
if (current instanceof OrFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter child : ((OrFilter) current).getFilters()) {
|
||||
children.add(pushDownNot(child));
|
||||
}
|
||||
return new OrFilter(children);
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
public static Filter convertToCnf(Filter current)
|
||||
{
|
||||
if (current instanceof NotFilter) {
|
||||
return new NotFilter(convertToCnf(((NotFilter) current).getBaseFilter()));
|
||||
}
|
||||
if (current instanceof AndFilter) {
|
||||
Set<Filter> children = new HashSet<>();
|
||||
for (Filter child : ((AndFilter) current).getFilters()) {
|
||||
children.add(convertToCnf(child));
|
||||
}
|
||||
return new AndFilter(children);
|
||||
}
|
||||
if (current instanceof OrFilter) {
|
||||
// a list of leaves that weren't under AND expressions
|
||||
List<Filter> nonAndList = new ArrayList<Filter>();
|
||||
// a list of AND expressions that we need to distribute
|
||||
List<Filter> andList = new ArrayList<Filter>();
|
||||
for (Filter child : ((OrFilter) current).getFilters()) {
|
||||
if (child instanceof AndFilter) {
|
||||
andList.add(child);
|
||||
} else if (child instanceof OrFilter) {
|
||||
// pull apart the kids of the OR expression
|
||||
nonAndList.addAll(((OrFilter) child).getFilters());
|
||||
} else {
|
||||
nonAndList.add(child);
|
||||
}
|
||||
}
|
||||
if (!andList.isEmpty()) {
|
||||
Set<Filter> result = new HashSet<>();
|
||||
generateAllCombinations(result, andList, nonAndList);
|
||||
return new AndFilter(result);
|
||||
}
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
public static Filter flatten(Filter root)
|
||||
{
|
||||
if (root instanceof BooleanFilter) {
|
||||
List<Filter> children = new ArrayList<>(((BooleanFilter) root).getFilters());
|
||||
// iterate through the index, so that if we add more children,
|
||||
// they don't get re-visited
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
Filter child = flatten(children.get(i));
|
||||
// do we need to flatten?
|
||||
if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) {
|
||||
boolean first = true;
|
||||
Set<Filter> grandKids = ((BooleanFilter) child).getFilters();
|
||||
for (Filter grandkid : grandKids) {
|
||||
// for the first grandkid replace the original parent
|
||||
if (first) {
|
||||
first = false;
|
||||
children.set(i, grandkid);
|
||||
} else {
|
||||
children.add(++i, grandkid);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
children.set(i, child);
|
||||
}
|
||||
}
|
||||
// if we have a singleton AND or OR, just return the child
|
||||
if (children.size() == 1 && (root instanceof AndFilter || root instanceof OrFilter)) {
|
||||
return children.get(0);
|
||||
}
|
||||
|
||||
if (root instanceof AndFilter) {
|
||||
return new AndFilter(children);
|
||||
} else if (root instanceof OrFilter) {
|
||||
return new OrFilter(children);
|
||||
}
|
||||
}
|
||||
return root;
|
||||
}
|
||||
|
||||
// A helper function adapted from Apache Hive, see:
|
||||
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
|
||||
private static void generateAllCombinations(
|
||||
Set<Filter> result,
|
||||
List<Filter> andList,
|
||||
List<Filter> nonAndList
|
||||
)
|
||||
{
|
||||
Set<Filter> children = ((AndFilter) andList.get(0)).getFilters();
|
||||
if (result.isEmpty()) {
|
||||
for (Filter child : children) {
|
||||
Set<Filter> a = new HashSet<>(nonAndList);
|
||||
a.add(child);
|
||||
result.add(new OrFilter(a));
|
||||
}
|
||||
} else {
|
||||
List<Filter> work = new ArrayList<>(result);
|
||||
result.clear();
|
||||
for (Filter child : children) {
|
||||
for (Filter or : work) {
|
||||
Set<Filter> a = new HashSet<>((((OrFilter) or).getFilters()));
|
||||
a.add(child);
|
||||
result.add(new OrFilter(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (andList.size() > 1) {
|
||||
generateAllCombinations(result, andList.subList(1, andList.size()), nonAndList);
|
||||
}
|
||||
}
|
||||
|
||||
private HiveCnfHelper()
|
||||
{
|
||||
}
|
||||
}
|
@ -128,7 +128,7 @@ public class JoinFilterAnalyzer
|
||||
);
|
||||
}
|
||||
|
||||
Filter normalizedFilter = Filters.toCNF(originalFilter);
|
||||
Filter normalizedFilter = Filters.toCnf(originalFilter);
|
||||
|
||||
// List of candidates for pushdown
|
||||
// CNF normalization will generate either
|
||||
|
@ -336,7 +336,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
|
||||
|
||||
final DimFilter maybeOptimized = optimize ? dimFilter.optimize() : dimFilter;
|
||||
final Filter filter = maybeOptimized.toFilter();
|
||||
return cnf ? Filters.toCNF(filter) : filter;
|
||||
return cnf ? Filters.toCnf(filter) : filter;
|
||||
}
|
||||
|
||||
private DimFilter maybeOptimize(final DimFilter dimFilter)
|
||||
|
@ -0,0 +1,490 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.filter;
|
||||
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.dimension.DimensionSpec;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
import org.apache.druid.segment.ColumnSelectorFactory;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
import org.apache.druid.segment.DimensionSelector;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.filter.cnf.CalciteCnfHelper;
|
||||
import org.apache.druid.segment.filter.cnf.HiveCnfHelper;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class FilterCnfConversionTest
|
||||
{
|
||||
@Test
|
||||
public void testPushDownNot()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.not(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
|
||||
)
|
||||
);
|
||||
final Filter expected = FilterTestUtils.or(
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col1", "1")),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col2", "2")),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
final Filter pushedDown = HiveCnfHelper.pushDownNot(filter);
|
||||
assertFilter(filter, expected, pushedDown);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPushDownNotLeafNot()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
|
||||
);
|
||||
final Filter pushedDown = HiveCnfHelper.pushDownNot(filter);
|
||||
assertFilter(filter, filter, pushedDown);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFlatten()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
final Filter expected = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2"),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
final Filter flattened = HiveCnfHelper.flatten(filter);
|
||||
assertFilter(filter, expected, flattened);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFlattenUnflattenable()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2")
|
||||
),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
final Filter flattened = HiveCnfHelper.flatten(filter);
|
||||
assertFilter(filter, filter, flattened);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToCnfWithMuchReducibleFilter()
|
||||
{
|
||||
final Filter muchReducible = FilterTestUtils.and(
|
||||
// should be flattened
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
|
||||
)
|
||||
),
|
||||
// should be flattened
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
|
||||
)
|
||||
),
|
||||
// should be flattened
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(FilterTestUtils.selector("col1", "val1"))
|
||||
)
|
||||
),
|
||||
// should eliminate duplicate filters
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col1", "val1")
|
||||
)
|
||||
)
|
||||
);
|
||||
final Filter expected = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
);
|
||||
final Filter cnf = Filters.toCnf(muchReducible);
|
||||
assertFilter(muchReducible, expected, cnf);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToCnfWithComplexFilterIncludingNotAndOr()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
),
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col4", "val4"),
|
||||
FilterTestUtils.selector("col5", "val5")
|
||||
)
|
||||
)
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col4", "val4"),
|
||||
FilterTestUtils.selector("col5", "val5")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val22"), // selecting different value
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.selector("col1", "val11")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val22"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.selector("col1", "val11") // selecting different value
|
||||
)
|
||||
)
|
||||
);
|
||||
final Filter expected = FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val22"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2")),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col1", "val11")),
|
||||
// The below OR filter could be eliminated because this filter also has
|
||||
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
|
||||
// The reduction process would be
|
||||
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
|
||||
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
|
||||
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5))
|
||||
// => (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
|
||||
// However, we don't have this reduction now, so we have a filter in a suboptimized CNF.
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
)
|
||||
);
|
||||
final Filter cnf = Filters.toCnf(filter);
|
||||
assertFilter(filter, expected, cnf);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToCnfCollapsibleBigFilter()
|
||||
{
|
||||
Set<Filter> ands = new HashSet<>();
|
||||
Set<Filter> ors = new HashSet<>();
|
||||
for (int i = 0; i < 12; i++) {
|
||||
ands.add(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.selector("col4", "val4"),
|
||||
FilterTestUtils.selector("col5", StringUtils.format("val%d", i))
|
||||
)
|
||||
);
|
||||
ors.add(FilterTestUtils.selector("col5", StringUtils.format("val%d", i)));
|
||||
}
|
||||
|
||||
final Filter bigFilter = FilterTestUtils.and(
|
||||
new OrFilter(ands),
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
);
|
||||
final Filter expectedCnf = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.selector("col4", "val4"),
|
||||
new OrFilter(ors)
|
||||
);
|
||||
final Filter cnf = Filters.toCnf(bigFilter);
|
||||
assertFilter(bigFilter, expectedCnf, cnf);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPullOrOnlyFilter()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
);
|
||||
|
||||
assertFilter(filter, filter, CalciteCnfHelper.pull(filter));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPullNotPullableFilter()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.or(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.selector("col4", "val4")
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col5", "val5"),
|
||||
FilterTestUtils.selector("col6", "val6")
|
||||
),
|
||||
FilterTestUtils.selector("col7", "val7")
|
||||
);
|
||||
|
||||
assertFilter(filter, filter, CalciteCnfHelper.pull(filter));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToCnfFilterThatPullCannotConvertToCnfProperly()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.or(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.selector("col4", "val4")
|
||||
)
|
||||
);
|
||||
|
||||
final Filter expectedCnf = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col4", "val4")
|
||||
)
|
||||
);
|
||||
|
||||
assertFilter(filter, expectedCnf, Filters.toCnf(filter));
|
||||
}
|
||||
|
||||
private void assertFilter(Filter original, Filter expectedConverted, Filter actualConverted)
|
||||
{
|
||||
assertEquivalent(original, expectedConverted);
|
||||
Assert.assertEquals(expectedConverted, actualConverted);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert the given two filters are equivalent by comparing their truth table.
|
||||
*/
|
||||
private void assertEquivalent(Filter f1, Filter f2)
|
||||
{
|
||||
final Set<SelectorFilter> s1 = searchForSelectors(f1);
|
||||
final Set<SelectorFilter> s2 = searchForSelectors(f2);
|
||||
Assert.assertEquals(s1, s2);
|
||||
|
||||
// Compare truth table
|
||||
final List<SelectorFilter> selectorFilters = new ArrayList<>(s1);
|
||||
List<Map<SelectorFilter, Boolean>> truthValues = populate(selectorFilters, selectorFilters.size() - 1);
|
||||
for (Map<SelectorFilter, Boolean> truthValue : truthValues) {
|
||||
Assert.assertEquals(evaluateFilterWith(f1, truthValue), evaluateFilterWith(f2, truthValue));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively populate all permutations of truth values.
|
||||
*
|
||||
* @param selectorFilters selector filters
|
||||
* @param cursor the offset to the current selectFilter to pupulate truth values
|
||||
*
|
||||
* @return a list of truth values populated up to the cursor
|
||||
*/
|
||||
private List<Map<SelectorFilter, Boolean>> populate(List<SelectorFilter> selectorFilters, int cursor)
|
||||
{
|
||||
// populateFalse and populateTrue will include the false and the true value
|
||||
// for the current selectFilter, respectively.
|
||||
final List<Map<SelectorFilter, Boolean>> populateFalse;
|
||||
final List<Map<SelectorFilter, Boolean>> populateTrue;
|
||||
if (cursor == 0) {
|
||||
Map<SelectorFilter, Boolean> mapForFalse = new HashMap<>();
|
||||
Map<SelectorFilter, Boolean> mapForTrue = new HashMap<>();
|
||||
for (SelectorFilter eachFilter : selectorFilters) {
|
||||
mapForFalse.put(eachFilter, false);
|
||||
mapForTrue.put(eachFilter, false);
|
||||
}
|
||||
populateFalse = new ArrayList<>();
|
||||
populateFalse.add(mapForFalse);
|
||||
populateTrue = new ArrayList<>();
|
||||
populateTrue.add(mapForTrue);
|
||||
} else {
|
||||
final List<Map<SelectorFilter, Boolean>> populated = populate(selectorFilters, cursor - 1);
|
||||
populateFalse = new ArrayList<>(populated.size());
|
||||
populateTrue = new ArrayList<>(populated.size());
|
||||
for (Map<SelectorFilter, Boolean> eachMap : populated) {
|
||||
populateFalse.add(new HashMap<>(eachMap));
|
||||
populateTrue.add(new HashMap<>(eachMap));
|
||||
}
|
||||
}
|
||||
|
||||
for (Map<SelectorFilter, Boolean> eachMap : populateTrue) {
|
||||
eachMap.put(selectorFilters.get(cursor), true);
|
||||
}
|
||||
|
||||
final List<Map<SelectorFilter, Boolean>> allPopulated = new ArrayList<>(populateFalse);
|
||||
allPopulated.addAll(populateTrue);
|
||||
return allPopulated;
|
||||
}
|
||||
|
||||
private Set<SelectorFilter> searchForSelectors(Filter filter)
|
||||
{
|
||||
Set<SelectorFilter> found = new HashSet<>();
|
||||
visitSelectorFilters(filter, selectorFilter -> {
|
||||
found.add(selectorFilter);
|
||||
return selectorFilter;
|
||||
});
|
||||
return found;
|
||||
}
|
||||
|
||||
private boolean evaluateFilterWith(Filter filter, Map<SelectorFilter, Boolean> values)
|
||||
{
|
||||
Filter rewrittenFilter = visitSelectorFilters(filter, selectorFilter -> {
|
||||
Boolean truth = values.get(selectorFilter);
|
||||
if (truth == null) {
|
||||
throw new ISE("Can't find truth value for selectorFilter[%s]", selectorFilter);
|
||||
}
|
||||
return truth ? TrueFilter.instance() : FalseFilter.instance();
|
||||
});
|
||||
return rewrittenFilter.makeMatcher(
|
||||
new ColumnSelectorFactory()
|
||||
{
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnValueSelector makeColumnValueSelector(String columnName)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public ColumnCapabilities getColumnCapabilities(String column)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
).matches();
|
||||
}
|
||||
|
||||
private Filter visitSelectorFilters(Filter filter, Function<SelectorFilter, Filter> visitAction)
|
||||
{
|
||||
if (filter instanceof AndFilter) {
|
||||
Set<Filter> newChildren = new HashSet<>();
|
||||
for (Filter child : ((AndFilter) filter).getFilters()) {
|
||||
newChildren.add(visitSelectorFilters(child, visitAction));
|
||||
}
|
||||
return new AndFilter(newChildren);
|
||||
} else if (filter instanceof OrFilter) {
|
||||
Set<Filter> newChildren = new HashSet<>();
|
||||
for (Filter child : ((OrFilter) filter).getFilters()) {
|
||||
newChildren.add(visitSelectorFilters(child, visitAction));
|
||||
}
|
||||
return new OrFilter(newChildren);
|
||||
} else if (filter instanceof NotFilter) {
|
||||
Filter child = ((NotFilter) filter).getBaseFilter();
|
||||
return new NotFilter(visitSelectorFilters(child, visitAction));
|
||||
} else if (filter instanceof SelectorFilter) {
|
||||
return visitAction.apply((SelectorFilter) filter);
|
||||
}
|
||||
return filter;
|
||||
}
|
||||
}
|
@ -621,7 +621,7 @@ public class FilterPartitionTest extends BaseFilterTest
|
||||
);
|
||||
|
||||
Filter filter1 = dimFilter1.toFilter();
|
||||
Filter filter1CNF = Filters.toCNF(filter1);
|
||||
Filter filter1CNF = Filters.toCnf(filter1);
|
||||
|
||||
Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
|
||||
Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
|
||||
@ -675,7 +675,7 @@ public class FilterPartitionTest extends BaseFilterTest
|
||||
);
|
||||
|
||||
Filter filter1 = dimFilter1.toFilter();
|
||||
Filter filter1CNF = Filters.toCNF(filter1);
|
||||
Filter filter1CNF = Filters.toCnf(filter1);
|
||||
|
||||
Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
|
||||
Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
|
||||
|
@ -25,7 +25,6 @@ import org.apache.druid.collections.bitmap.BitmapFactory;
|
||||
import org.apache.druid.collections.bitmap.ConciseBitmapFactory;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
import org.apache.druid.segment.IntIteratorUtils;
|
||||
import org.apache.druid.segment.column.BitmapIndex;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
@ -52,215 +51,6 @@ public class FiltersTest extends InitializedNullHandlingTest
|
||||
Assert.assertEquals(expected, estimated, 0.00001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPushDownNot()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.not(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
|
||||
)
|
||||
);
|
||||
final Filter expected = FilterTestUtils.or(
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col1", "1")),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col2", "2")),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
Assert.assertEquals(expected, Filters.pushDownNot(filter));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPushDownNotLeafNot()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
|
||||
);
|
||||
Assert.assertEquals(filter, Filters.pushDownNot(filter));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFlatten()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
final Filter expected = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2"),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
Assert.assertEquals(expected, Filters.flatten(filter));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFlattenUnflattenable()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "1"),
|
||||
FilterTestUtils.selector("col2", "2")
|
||||
),
|
||||
FilterTestUtils.selector("col3", "3")
|
||||
);
|
||||
Assert.assertEquals(filter, Filters.flatten(filter));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToCNFWithMuchReducibleFilter()
|
||||
{
|
||||
final Filter muchReducible = FilterTestUtils.and(
|
||||
// should be flattened
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
|
||||
)
|
||||
),
|
||||
// should be flattened
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
|
||||
)
|
||||
),
|
||||
// should be flattened
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(FilterTestUtils.selector("col1", "val1"))
|
||||
)
|
||||
),
|
||||
// should eliminate duplicate filters
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col1", "val1")
|
||||
)
|
||||
)
|
||||
);
|
||||
final Filter expected = FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
);
|
||||
Assert.assertEquals(expected, Filters.toCNF(muchReducible));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToCNFWithComplexFilterIncludingNotAndOr()
|
||||
{
|
||||
final Filter filter = FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val2")
|
||||
),
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col4", "val4"),
|
||||
FilterTestUtils.selector("col5", "val5")
|
||||
)
|
||||
)
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.selector("col4", "val4"),
|
||||
FilterTestUtils.selector("col5", "val5")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val22"), // selecting different value
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.selector("col1", "val11")
|
||||
)
|
||||
),
|
||||
FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val22"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.not(
|
||||
FilterTestUtils.selector("col1", "val11") // selecting different value
|
||||
)
|
||||
)
|
||||
);
|
||||
final Filter expected = FilterTestUtils.and(
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.selector("col2", "val22"),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2")),
|
||||
FilterTestUtils.selector("col3", "val3")
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col3", "val3"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col1", "val11")),
|
||||
// The below OR filter could be eliminated because this filter also has
|
||||
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
|
||||
// The reduction process would be
|
||||
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
|
||||
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
|
||||
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5))
|
||||
// => (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
|
||||
// However, we don't have this reduction now, so we have a filter in a suboptimized CNF.
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col1", "val1"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
),
|
||||
FilterTestUtils.or(
|
||||
FilterTestUtils.selector("col2", "val2"),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
|
||||
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
|
||||
)
|
||||
);
|
||||
Assert.assertEquals(expected, Filters.toCNF(filter));
|
||||
}
|
||||
|
||||
private static BitmapIndex getBitmapIndex(final List<ImmutableBitmap> bitmapList)
|
||||
{
|
||||
return new BitmapIndex()
|
||||
|
Loading…
x
Reference in New Issue
Block a user