More optimize CNF conversion of filters (#9634)

* More optimize CNF conversion of filters

* update license

* fix build

* checkstyle

* remove unnecessary code

* split helper

* license

* checkstyle

* add comments on cnf conversion
This commit is contained in:
Jihoon Son 2020-04-08 21:31:17 -07:00 committed by GitHub
parent 08851c0198
commit a6790ff22a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1100 additions and 384 deletions

View File

@ -215,7 +215,7 @@ Apache License version 2.0
SOURCE/JAVA-CORE
This product contains conjunctive normal form conversion code, a variance aggregator algorithm, and Bloom filter
adapted from Apache Hive.
* processing/src/main/java/org/apache/druid/segment/filter/Filters.java
* processing/src/main/java/org/apache/druid/segment/filter/cnf/HiveCnfHelper.java
* extensions-core/stats/src/main/java/io/druid/query/aggregation/variance/VarianceAggregatorCollector.java
* extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilter.java
@ -224,6 +224,7 @@ SOURCE/JAVA-CORE
This product contains SQL query planning code adapted from Apache Calcite.
* sql/src/main/java/org/apache/druid/sql/calcite/
* processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java
This product contains Kerberos authentication code adapted from Apache Hadoop.
* extensions-core/druid-kerberos/src/main/java/org/apache/druid/security/kerberos/

View File

@ -377,7 +377,7 @@ public class FilterPartitionBenchmark
Filter orFilter = new OrFilter(Arrays.asList(filter, filter2));
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCNF(orFilter));
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCnf(orFilter));
readCursors(cursors, blackhole);
}
@ -451,7 +451,7 @@ public class FilterPartitionBenchmark
);
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCNF(dimFilter3.toFilter()));
Sequence<Cursor> cursors = makeCursors(sa, Filters.toCnf(dimFilter3.toFilter()));
readCursors(cursors, blackhole);
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.filter;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.filter.BitmapIndexSelector;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import java.util.Collections;
import java.util.Set;
public class FalseFilter implements Filter
{
private static final FalseFilter INSTANCE = new FalseFilter();
public static FalseFilter instance()
{
return INSTANCE;
}
private FalseFilter()
{
}
@Override
public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.wrapAllFalse(Filters.allFalse(selector));
}
@Override
public double estimateSelectivity(BitmapIndexSelector indexSelector)
{
return 0;
}
@Override
public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
{
return FalseValueMatcher.instance();
}
@Override
public boolean supportsBitmapIndex(BitmapIndexSelector selector)
{
return true;
}
@Override
public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
{
return true;
}
@Override
public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
{
return true;
}
@Override
public Set<String> getRequiredColumns()
{
return Collections.emptySet();
}
@Override
public String toString()
{
return "false";
}
}

View File

@ -19,7 +19,6 @@
package org.apache.druid.segment.filter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
@ -30,7 +29,6 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.Query;
import org.apache.druid.query.filter.BitmapIndexSelector;
import org.apache.druid.query.filter.BooleanFilter;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.Filter;
@ -44,12 +42,12 @@ import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.filter.cnf.CalciteCnfHelper;
import org.apache.druid.segment.filter.cnf.HiveCnfHelper;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
@ -426,175 +424,29 @@ public class Filters
return null;
}
boolean useCNF = query.getContextBoolean(CTX_KEY_USE_FILTER_CNF, false);
return useCNF ? toCNF(filter) : filter;
return useCNF ? Filters.toCnf(filter) : filter;
}
public static Filter toCNF(Filter current)
public static Filter toCnf(Filter current)
{
current = pushDownNot(current);
current = flatten(current);
current = convertToCNFInternal(current);
current = flatten(current);
// Push down NOT filters to leaves if possible to remove NOT on NOT filters and reduce hierarchy.
// ex) ~(a OR ~b) => ~a AND b
current = HiveCnfHelper.pushDownNot(current);
// Flatten nested AND and OR filters if possible.
// ex) a AND (b AND c) => a AND b AND c
current = HiveCnfHelper.flatten(current);
// Pull up AND filters first to convert the filter into a conjunctive form.
// It is important to pull before CNF conversion to not create a huge CNF.
// ex) (a AND b) OR (a AND c AND d) => a AND (b OR (c AND d))
current = CalciteCnfHelper.pull(current);
// Convert filter to CNF.
// a AND (b OR (c AND d)) => a AND (b OR c) AND (b OR d)
current = HiveCnfHelper.convertToCnf(current);
// Flatten again to remove any flattenable nested AND or OR filters created during CNF conversion.
current = HiveCnfHelper.flatten(current);
return current;
}
// CNF conversion functions were adapted from Apache Hive, see:
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@VisibleForTesting
static Filter pushDownNot(Filter current)
{
if (current instanceof NotFilter) {
Filter child = ((NotFilter) current).getBaseFilter();
if (child instanceof NotFilter) {
return pushDownNot(((NotFilter) child).getBaseFilter());
}
if (child instanceof AndFilter) {
Set<Filter> children = new HashSet<>();
for (Filter grandChild : ((AndFilter) child).getFilters()) {
children.add(pushDownNot(new NotFilter(grandChild)));
}
return new OrFilter(children);
}
if (child instanceof OrFilter) {
Set<Filter> children = new HashSet<>();
for (Filter grandChild : ((OrFilter) child).getFilters()) {
children.add(pushDownNot(new NotFilter(grandChild)));
}
return new AndFilter(children);
}
}
if (current instanceof AndFilter) {
Set<Filter> children = new HashSet<>();
for (Filter child : ((AndFilter) current).getFilters()) {
children.add(pushDownNot(child));
}
return new AndFilter(children);
}
if (current instanceof OrFilter) {
Set<Filter> children = new HashSet<>();
for (Filter child : ((OrFilter) current).getFilters()) {
children.add(pushDownNot(child));
}
return new OrFilter(children);
}
return current;
}
// CNF conversion functions were adapted from Apache Hive, see:
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
private static Filter convertToCNFInternal(Filter current)
{
if (current instanceof NotFilter) {
return new NotFilter(convertToCNFInternal(((NotFilter) current).getBaseFilter()));
}
if (current instanceof AndFilter) {
Set<Filter> children = new HashSet<>();
for (Filter child : ((AndFilter) current).getFilters()) {
children.add(convertToCNFInternal(child));
}
return new AndFilter(children);
}
if (current instanceof OrFilter) {
// a list of leaves that weren't under AND expressions
List<Filter> nonAndList = new ArrayList<Filter>();
// a list of AND expressions that we need to distribute
List<Filter> andList = new ArrayList<Filter>();
for (Filter child : ((OrFilter) current).getFilters()) {
if (child instanceof AndFilter) {
andList.add(child);
} else if (child instanceof OrFilter) {
// pull apart the kids of the OR expression
nonAndList.addAll(((OrFilter) child).getFilters());
} else {
nonAndList.add(child);
}
}
if (!andList.isEmpty()) {
Set<Filter> result = new HashSet<>();
generateAllCombinations(result, andList, nonAndList);
return new AndFilter(result);
}
}
return current;
}
// CNF conversion functions were adapted from Apache Hive, see:
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@VisibleForTesting
static Filter flatten(Filter root)
{
if (root instanceof BooleanFilter) {
List<Filter> children = new ArrayList<>(((BooleanFilter) root).getFilters());
// iterate through the index, so that if we add more children,
// they don't get re-visited
for (int i = 0; i < children.size(); ++i) {
Filter child = flatten(children.get(i));
// do we need to flatten?
if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) {
boolean first = true;
Set<Filter> grandKids = ((BooleanFilter) child).getFilters();
for (Filter grandkid : grandKids) {
// for the first grandkid replace the original parent
if (first) {
first = false;
children.set(i, grandkid);
} else {
children.add(++i, grandkid);
}
}
} else {
children.set(i, child);
}
}
// if we have a singleton AND or OR, just return the child
if (children.size() == 1 && (root instanceof AndFilter || root instanceof OrFilter)) {
return children.get(0);
}
if (root instanceof AndFilter) {
return new AndFilter(children);
} else if (root instanceof OrFilter) {
return new OrFilter(children);
}
}
return root;
}
// CNF conversion functions were adapted from Apache Hive, see:
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
private static void generateAllCombinations(
Set<Filter> result,
List<Filter> andList,
List<Filter> nonAndList
)
{
Set<Filter> children = ((AndFilter) andList.get(0)).getFilters();
if (result.isEmpty()) {
for (Filter child : children) {
Set<Filter> a = new HashSet<>(nonAndList);
a.add(child);
result.add(new OrFilter(a));
}
} else {
List<Filter> work = new ArrayList<>(result);
result.clear();
for (Filter child : children) {
for (Filter or : work) {
Set<Filter> a = new HashSet<>((((OrFilter) or).getFilters()));
a.add(child);
result.add(new OrFilter(a));
}
}
}
if (andList.size() > 1) {
generateAllCombinations(result, andList.subList(1, andList.size()), nonAndList);
}
}
/**
* This method provides a "standard" implementation of {@link Filter#shouldUseBitmapIndex(BitmapIndexSelector)} which takes
* a {@link Filter}, a {@link BitmapIndexSelector}, and {@link FilterTuning} to determine if:

View File

@ -0,0 +1,301 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.filter.cnf;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.segment.filter.AndFilter;
import org.apache.druid.segment.filter.FalseFilter;
import org.apache.druid.segment.filter.OrFilter;
import org.apache.druid.segment.filter.TrueFilter;
import javax.annotation.Nonnull;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* All functions in this class were basically adopted from Apache Calcite and modified to use them in Druid.
* See https://github.com/apache/calcite/blob/branch-1.21/core/src/main/java/org/apache/calcite/rex/RexUtil.java#L1615
* for original implementation.
*/
public class CalciteCnfHelper
{
public static Filter pull(Filter rex)
{
final Set<Filter> operands;
if (rex instanceof AndFilter) {
operands = ((AndFilter) rex).getFilters();
return and(pullList(operands));
} else if (rex instanceof OrFilter) {
operands = ((OrFilter) rex).getFilters();
final Map<Filter, Filter> factors = commonFactors(operands);
if (factors.isEmpty()) {
return or(operands);
}
final List<Filter> list = new ArrayList<>();
for (Filter operand : operands) {
Filter removed = removeFactor(factors, operand);
if (removed != null) {
list.add(removed);
}
}
if (list.isEmpty()) {
return and(factors.values());
} else if (list.size() == 1) {
return and(Iterables.concat(factors.values(), ImmutableList.of(list.get(0))));
} else {
return and(Iterables.concat(factors.values(), ImmutableList.of(or(list))));
}
} else {
return rex;
}
}
private static List<Filter> pullList(Set<Filter> nodes)
{
final List<Filter> list = new ArrayList<>();
for (Filter node : nodes) {
Filter pulled = pull(node);
if (pulled instanceof AndFilter) {
list.addAll(((AndFilter) pulled).getFilters());
} else {
list.add(pulled);
}
}
return list;
}
private static Map<Filter, Filter> commonFactors(Set<Filter> nodes)
{
final Map<Filter, Filter> map = new HashMap<>();
int i = 0;
for (Filter node : nodes) {
if (i++ == 0) {
for (Filter conjunction : conjunctions(node)) {
map.put(conjunction, conjunction);
}
} else {
map.keySet().retainAll(conjunctions(node));
}
}
return map;
}
private static Filter removeFactor(Map<Filter, Filter> factors, Filter node)
{
List<Filter> list = new ArrayList<>();
for (Filter operand : conjunctions(node)) {
if (!factors.containsKey(operand)) {
list.add(operand);
}
}
return and(list);
}
private static Filter and(Iterable<? extends Filter> nodes)
{
return composeConjunction(nodes);
}
private static Filter or(Iterable<? extends Filter> nodes)
{
return composeDisjunction(nodes);
}
/** As {@link #composeConjunction(Iterable, boolean)} but never
* returns null. */
public static @Nonnull Filter composeConjunction(Iterable<? extends Filter> nodes)
{
final Filter e = composeConjunction(nodes, false);
return Objects.requireNonNull(e);
}
/**
* Converts a collection of expressions into an AND.
* If there are zero expressions, returns TRUE.
* If there is one expression, returns just that expression.
* If any of the expressions are FALSE, returns FALSE.
* Removes expressions that always evaluate to TRUE.
* Returns null only if {@code nullOnEmpty} and expression is TRUE.
*/
public static Filter composeConjunction(Iterable<? extends Filter> nodes, boolean nullOnEmpty)
{
ImmutableList<Filter> list = flattenAnd(nodes);
switch (list.size()) {
case 0:
return nullOnEmpty
? null
: TrueFilter.instance();
case 1:
return list.get(0);
default:
return new AndFilter(list);
}
}
/** Flattens a list of AND nodes.
*
* <p>Treats null nodes as literal TRUE (i.e. ignores them). */
public static ImmutableList<Filter> flattenAnd(Iterable<? extends Filter> nodes)
{
if (nodes instanceof Collection && ((Collection) nodes).isEmpty()) {
// Optimize common case
return ImmutableList.of();
}
final ImmutableList.Builder<Filter> builder = ImmutableList.builder();
final Set<Filter> set = new HashSet<>(); // to eliminate duplicates
for (Filter node : nodes) {
if (node != null) {
addAnd(builder, set, node);
}
}
return builder.build();
}
private static void addAnd(ImmutableList.Builder<Filter> builder, Set<Filter> digests, Filter node)
{
if (node instanceof AndFilter) {
for (Filter operand : ((AndFilter) node).getFilters()) {
addAnd(builder, digests, operand);
}
} else {
if (!(node instanceof TrueFilter) && digests.add(node)) {
builder.add(node);
}
}
}
/**
* Converts a collection of expressions into an OR.
* If there are zero expressions, returns FALSE.
* If there is one expression, returns just that expression.
* If any of the expressions are TRUE, returns TRUE.
* Removes expressions that always evaluate to FALSE.
* Flattens expressions that are ORs.
*/
@Nonnull public static Filter composeDisjunction(Iterable<? extends Filter> nodes)
{
final Filter e = composeDisjunction(nodes, false);
return Objects.requireNonNull(e);
}
/**
* Converts a collection of expressions into an OR,
* optionally returning null if the list is empty.
*/
public static Filter composeDisjunction(Iterable<? extends Filter> nodes, boolean nullOnEmpty)
{
ImmutableList<Filter> list = flattenOr(nodes);
switch (list.size()) {
case 0:
return nullOnEmpty ? null : FalseFilter.instance();
case 1:
return list.get(0);
default:
if (containsTrue(list)) {
return TrueFilter.instance();
}
return new OrFilter(list);
}
}
/** Flattens a list of OR nodes. */
public static ImmutableList<Filter> flattenOr(Iterable<? extends Filter> nodes)
{
if (nodes instanceof Collection && ((Collection) nodes).isEmpty()) {
// Optimize common case
return ImmutableList.of();
}
final ImmutableList.Builder<Filter> builder = ImmutableList.builder();
final Set<Filter> set = new HashSet<>(); // to eliminate duplicates
for (Filter node : nodes) {
addOr(builder, set, node);
}
return builder.build();
}
private static void addOr(ImmutableList.Builder<Filter> builder, Set<Filter> set, Filter node)
{
if (node instanceof OrFilter) {
for (Filter operand : ((OrFilter) node).getFilters()) {
addOr(builder, set, operand);
}
} else {
if (set.add(node)) {
builder.add(node);
}
}
}
/**
* Returns a condition decomposed by AND.
*
* <p>For example, {@code conjunctions(TRUE)} returns the empty list;
* {@code conjunctions(FALSE)} returns list {@code {FALSE}}.</p>
*/
public static List<Filter> conjunctions(Filter rexPredicate)
{
final List<Filter> list = new ArrayList<>();
decomposeConjunction(rexPredicate, list);
return list;
}
/**
* Decomposes a predicate into a list of expressions that are AND'ed
* together.
*
* @param rexPredicate predicate to be analyzed
* @param rexList list of decomposed RexNodes
*/
public static void decomposeConjunction(Filter rexPredicate, List<Filter> rexList)
{
if (rexPredicate == null || rexPredicate instanceof TrueFilter) {
return;
}
if (rexPredicate instanceof AndFilter) {
for (Filter operand : ((AndFilter) rexPredicate).getFilters()) {
decomposeConjunction(operand, rexList);
}
} else {
rexList.add(rexPredicate);
}
}
private static boolean containsTrue(Iterable<Filter> nodes)
{
for (Filter node : nodes) {
if (node instanceof TrueFilter) {
return true;
}
}
return false;
}
private CalciteCnfHelper()
{
}
}

View File

@ -0,0 +1,190 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.filter.cnf;
import org.apache.druid.query.filter.BooleanFilter;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.segment.filter.AndFilter;
import org.apache.druid.segment.filter.NotFilter;
import org.apache.druid.segment.filter.OrFilter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* All functions in this class were basically adopted from Apache Hive and modified to use them in Druid.
* See https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
* for original implementation.
*/
public class HiveCnfHelper
{
public static Filter pushDownNot(Filter current)
{
if (current instanceof NotFilter) {
Filter child = ((NotFilter) current).getBaseFilter();
if (child instanceof NotFilter) {
return pushDownNot(((NotFilter) child).getBaseFilter());
}
if (child instanceof AndFilter) {
Set<Filter> children = new HashSet<>();
for (Filter grandChild : ((AndFilter) child).getFilters()) {
children.add(pushDownNot(new NotFilter(grandChild)));
}
return new OrFilter(children);
}
if (child instanceof OrFilter) {
Set<Filter> children = new HashSet<>();
for (Filter grandChild : ((OrFilter) child).getFilters()) {
children.add(pushDownNot(new NotFilter(grandChild)));
}
return new AndFilter(children);
}
}
if (current instanceof AndFilter) {
Set<Filter> children = new HashSet<>();
for (Filter child : ((AndFilter) current).getFilters()) {
children.add(pushDownNot(child));
}
return new AndFilter(children);
}
if (current instanceof OrFilter) {
Set<Filter> children = new HashSet<>();
for (Filter child : ((OrFilter) current).getFilters()) {
children.add(pushDownNot(child));
}
return new OrFilter(children);
}
return current;
}
public static Filter convertToCnf(Filter current)
{
if (current instanceof NotFilter) {
return new NotFilter(convertToCnf(((NotFilter) current).getBaseFilter()));
}
if (current instanceof AndFilter) {
Set<Filter> children = new HashSet<>();
for (Filter child : ((AndFilter) current).getFilters()) {
children.add(convertToCnf(child));
}
return new AndFilter(children);
}
if (current instanceof OrFilter) {
// a list of leaves that weren't under AND expressions
List<Filter> nonAndList = new ArrayList<Filter>();
// a list of AND expressions that we need to distribute
List<Filter> andList = new ArrayList<Filter>();
for (Filter child : ((OrFilter) current).getFilters()) {
if (child instanceof AndFilter) {
andList.add(child);
} else if (child instanceof OrFilter) {
// pull apart the kids of the OR expression
nonAndList.addAll(((OrFilter) child).getFilters());
} else {
nonAndList.add(child);
}
}
if (!andList.isEmpty()) {
Set<Filter> result = new HashSet<>();
generateAllCombinations(result, andList, nonAndList);
return new AndFilter(result);
}
}
return current;
}
public static Filter flatten(Filter root)
{
if (root instanceof BooleanFilter) {
List<Filter> children = new ArrayList<>(((BooleanFilter) root).getFilters());
// iterate through the index, so that if we add more children,
// they don't get re-visited
for (int i = 0; i < children.size(); ++i) {
Filter child = flatten(children.get(i));
// do we need to flatten?
if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) {
boolean first = true;
Set<Filter> grandKids = ((BooleanFilter) child).getFilters();
for (Filter grandkid : grandKids) {
// for the first grandkid replace the original parent
if (first) {
first = false;
children.set(i, grandkid);
} else {
children.add(++i, grandkid);
}
}
} else {
children.set(i, child);
}
}
// if we have a singleton AND or OR, just return the child
if (children.size() == 1 && (root instanceof AndFilter || root instanceof OrFilter)) {
return children.get(0);
}
if (root instanceof AndFilter) {
return new AndFilter(children);
} else if (root instanceof OrFilter) {
return new OrFilter(children);
}
}
return root;
}
// A helper function adapted from Apache Hive, see:
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
private static void generateAllCombinations(
Set<Filter> result,
List<Filter> andList,
List<Filter> nonAndList
)
{
Set<Filter> children = ((AndFilter) andList.get(0)).getFilters();
if (result.isEmpty()) {
for (Filter child : children) {
Set<Filter> a = new HashSet<>(nonAndList);
a.add(child);
result.add(new OrFilter(a));
}
} else {
List<Filter> work = new ArrayList<>(result);
result.clear();
for (Filter child : children) {
for (Filter or : work) {
Set<Filter> a = new HashSet<>((((OrFilter) or).getFilters()));
a.add(child);
result.add(new OrFilter(a));
}
}
}
if (andList.size() > 1) {
generateAllCombinations(result, andList.subList(1, andList.size()), nonAndList);
}
}
private HiveCnfHelper()
{
}
}

View File

@ -128,7 +128,7 @@ public class JoinFilterAnalyzer
);
}
Filter normalizedFilter = Filters.toCNF(originalFilter);
Filter normalizedFilter = Filters.toCnf(originalFilter);
// List of candidates for pushdown
// CNF normalization will generate either

View File

@ -336,7 +336,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
final DimFilter maybeOptimized = optimize ? dimFilter.optimize() : dimFilter;
final Filter filter = maybeOptimized.toFilter();
return cnf ? Filters.toCNF(filter) : filter;
return cnf ? Filters.toCnf(filter) : filter;
}
private DimFilter maybeOptimize(final DimFilter dimFilter)

View File

@ -0,0 +1,490 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.filter;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.filter.cnf.CalciteCnfHelper;
import org.apache.druid.segment.filter.cnf.HiveCnfHelper;
import org.junit.Assert;
import org.junit.Test;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
public class FilterCnfConversionTest
{
@Test
public void testPushDownNot()
{
final Filter filter = FilterTestUtils.not(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2"),
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
)
);
final Filter expected = FilterTestUtils.or(
FilterTestUtils.not(FilterTestUtils.selector("col1", "1")),
FilterTestUtils.not(FilterTestUtils.selector("col2", "2")),
FilterTestUtils.selector("col3", "3")
);
final Filter pushedDown = HiveCnfHelper.pushDownNot(filter);
assertFilter(filter, expected, pushedDown);
}
@Test
public void testPushDownNotLeafNot()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2"),
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
);
final Filter pushedDown = HiveCnfHelper.pushDownNot(filter);
assertFilter(filter, filter, pushedDown);
}
@Test
public void testFlatten()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.and(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2")
)
),
FilterTestUtils.selector("col3", "3")
);
final Filter expected = FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2"),
FilterTestUtils.selector("col3", "3")
);
final Filter flattened = HiveCnfHelper.flatten(filter);
assertFilter(filter, expected, flattened);
}
@Test
public void testFlattenUnflattenable()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2")
),
FilterTestUtils.selector("col3", "3")
);
final Filter flattened = HiveCnfHelper.flatten(filter);
assertFilter(filter, filter, flattened);
}
@Test
public void testToCnfWithMuchReducibleFilter()
{
final Filter muchReducible = FilterTestUtils.and(
// should be flattened
FilterTestUtils.and(
FilterTestUtils.and(
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
)
),
// should be flattened
FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
)
),
// should be flattened
FilterTestUtils.or(
FilterTestUtils.and(
FilterTestUtils.or(FilterTestUtils.selector("col1", "val1"))
)
),
// should eliminate duplicate filters
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
),
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.and(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col1", "val1")
)
)
);
final Filter expected = FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
);
final Filter cnf = Filters.toCnf(muchReducible);
assertFilter(muchReducible, expected, cnf);
}
@Test
public void testToCnfWithComplexFilterIncludingNotAndOr()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
),
FilterTestUtils.not(
FilterTestUtils.and(
FilterTestUtils.selector("col4", "val4"),
FilterTestUtils.selector("col5", "val5")
)
)
),
FilterTestUtils.or(
FilterTestUtils.not(
FilterTestUtils.or(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col4", "val4"),
FilterTestUtils.selector("col5", "val5")
)
),
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col3", "val3")
)
),
FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val22"), // selecting different value
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.not(
FilterTestUtils.selector("col1", "val11")
)
),
FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val22"),
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.not(
FilterTestUtils.selector("col1", "val11") // selecting different value
)
)
);
final Filter expected = FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val22"),
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2"))
),
FilterTestUtils.or(
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2")),
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
),
FilterTestUtils.not(FilterTestUtils.selector("col1", "val11")),
// The below OR filter could be eliminated because this filter also has
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
// The reduction process would be
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5))
// => (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
// However, we don't have this reduction now, so we have a filter in a suboptimized CNF.
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
)
);
final Filter cnf = Filters.toCnf(filter);
assertFilter(filter, expected, cnf);
}
@Test
public void testToCnfCollapsibleBigFilter()
{
Set<Filter> ands = new HashSet<>();
Set<Filter> ors = new HashSet<>();
for (int i = 0; i < 12; i++) {
ands.add(
FilterTestUtils.and(
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.selector("col4", "val4"),
FilterTestUtils.selector("col5", StringUtils.format("val%d", i))
)
);
ors.add(FilterTestUtils.selector("col5", StringUtils.format("val%d", i)));
}
final Filter bigFilter = FilterTestUtils.and(
new OrFilter(ands),
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
);
final Filter expectedCnf = FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.selector("col4", "val4"),
new OrFilter(ors)
);
final Filter cnf = Filters.toCnf(bigFilter);
assertFilter(bigFilter, expectedCnf, cnf);
}
@Test
public void testPullOrOnlyFilter()
{
final Filter filter = FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col3", "val3")
);
assertFilter(filter, filter, CalciteCnfHelper.pull(filter));
}
@Test
public void testPullNotPullableFilter()
{
final Filter filter = FilterTestUtils.or(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
),
FilterTestUtils.and(
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.selector("col4", "val4")
),
FilterTestUtils.and(
FilterTestUtils.selector("col5", "val5"),
FilterTestUtils.selector("col6", "val6")
),
FilterTestUtils.selector("col7", "val7")
);
assertFilter(filter, filter, CalciteCnfHelper.pull(filter));
}
@Test
public void testToCnfFilterThatPullCannotConvertToCnfProperly()
{
final Filter filter = FilterTestUtils.or(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
),
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.selector("col4", "val4")
)
);
final Filter expectedCnf = FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.or(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.or(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col4", "val4")
)
);
assertFilter(filter, expectedCnf, Filters.toCnf(filter));
}
private void assertFilter(Filter original, Filter expectedConverted, Filter actualConverted)
{
assertEquivalent(original, expectedConverted);
Assert.assertEquals(expectedConverted, actualConverted);
}
/**
* Assert the given two filters are equivalent by comparing their truth table.
*/
private void assertEquivalent(Filter f1, Filter f2)
{
final Set<SelectorFilter> s1 = searchForSelectors(f1);
final Set<SelectorFilter> s2 = searchForSelectors(f2);
Assert.assertEquals(s1, s2);
// Compare truth table
final List<SelectorFilter> selectorFilters = new ArrayList<>(s1);
List<Map<SelectorFilter, Boolean>> truthValues = populate(selectorFilters, selectorFilters.size() - 1);
for (Map<SelectorFilter, Boolean> truthValue : truthValues) {
Assert.assertEquals(evaluateFilterWith(f1, truthValue), evaluateFilterWith(f2, truthValue));
}
}
/**
* Recursively populate all permutations of truth values.
*
* @param selectorFilters selector filters
* @param cursor the offset to the current selectFilter to pupulate truth values
*
* @return a list of truth values populated up to the cursor
*/
private List<Map<SelectorFilter, Boolean>> populate(List<SelectorFilter> selectorFilters, int cursor)
{
// populateFalse and populateTrue will include the false and the true value
// for the current selectFilter, respectively.
final List<Map<SelectorFilter, Boolean>> populateFalse;
final List<Map<SelectorFilter, Boolean>> populateTrue;
if (cursor == 0) {
Map<SelectorFilter, Boolean> mapForFalse = new HashMap<>();
Map<SelectorFilter, Boolean> mapForTrue = new HashMap<>();
for (SelectorFilter eachFilter : selectorFilters) {
mapForFalse.put(eachFilter, false);
mapForTrue.put(eachFilter, false);
}
populateFalse = new ArrayList<>();
populateFalse.add(mapForFalse);
populateTrue = new ArrayList<>();
populateTrue.add(mapForTrue);
} else {
final List<Map<SelectorFilter, Boolean>> populated = populate(selectorFilters, cursor - 1);
populateFalse = new ArrayList<>(populated.size());
populateTrue = new ArrayList<>(populated.size());
for (Map<SelectorFilter, Boolean> eachMap : populated) {
populateFalse.add(new HashMap<>(eachMap));
populateTrue.add(new HashMap<>(eachMap));
}
}
for (Map<SelectorFilter, Boolean> eachMap : populateTrue) {
eachMap.put(selectorFilters.get(cursor), true);
}
final List<Map<SelectorFilter, Boolean>> allPopulated = new ArrayList<>(populateFalse);
allPopulated.addAll(populateTrue);
return allPopulated;
}
private Set<SelectorFilter> searchForSelectors(Filter filter)
{
Set<SelectorFilter> found = new HashSet<>();
visitSelectorFilters(filter, selectorFilter -> {
found.add(selectorFilter);
return selectorFilter;
});
return found;
}
private boolean evaluateFilterWith(Filter filter, Map<SelectorFilter, Boolean> values)
{
Filter rewrittenFilter = visitSelectorFilters(filter, selectorFilter -> {
Boolean truth = values.get(selectorFilter);
if (truth == null) {
throw new ISE("Can't find truth value for selectorFilter[%s]", selectorFilter);
}
return truth ? TrueFilter.instance() : FalseFilter.instance();
});
return rewrittenFilter.makeMatcher(
new ColumnSelectorFactory()
{
@Override
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
{
return null;
}
@Override
public ColumnValueSelector makeColumnValueSelector(String columnName)
{
return null;
}
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
return null;
}
}
).matches();
}
private Filter visitSelectorFilters(Filter filter, Function<SelectorFilter, Filter> visitAction)
{
if (filter instanceof AndFilter) {
Set<Filter> newChildren = new HashSet<>();
for (Filter child : ((AndFilter) filter).getFilters()) {
newChildren.add(visitSelectorFilters(child, visitAction));
}
return new AndFilter(newChildren);
} else if (filter instanceof OrFilter) {
Set<Filter> newChildren = new HashSet<>();
for (Filter child : ((OrFilter) filter).getFilters()) {
newChildren.add(visitSelectorFilters(child, visitAction));
}
return new OrFilter(newChildren);
} else if (filter instanceof NotFilter) {
Filter child = ((NotFilter) filter).getBaseFilter();
return new NotFilter(visitSelectorFilters(child, visitAction));
} else if (filter instanceof SelectorFilter) {
return visitAction.apply((SelectorFilter) filter);
}
return filter;
}
}

View File

@ -621,7 +621,7 @@ public class FilterPartitionTest extends BaseFilterTest
);
Filter filter1 = dimFilter1.toFilter();
Filter filter1CNF = Filters.toCNF(filter1);
Filter filter1CNF = Filters.toCnf(filter1);
Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
@ -675,7 +675,7 @@ public class FilterPartitionTest extends BaseFilterTest
);
Filter filter1 = dimFilter1.toFilter();
Filter filter1CNF = Filters.toCNF(filter1);
Filter filter1CNF = Filters.toCnf(filter1);
Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());

View File

@ -25,7 +25,6 @@ import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ConciseBitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.segment.IntIteratorUtils;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.testing.InitializedNullHandlingTest;
@ -52,215 +51,6 @@ public class FiltersTest extends InitializedNullHandlingTest
Assert.assertEquals(expected, estimated, 0.00001);
}
@Test
public void testPushDownNot()
{
final Filter filter = FilterTestUtils.not(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2"),
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
)
);
final Filter expected = FilterTestUtils.or(
FilterTestUtils.not(FilterTestUtils.selector("col1", "1")),
FilterTestUtils.not(FilterTestUtils.selector("col2", "2")),
FilterTestUtils.selector("col3", "3")
);
Assert.assertEquals(expected, Filters.pushDownNot(filter));
}
@Test
public void testPushDownNotLeafNot()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2"),
FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
);
Assert.assertEquals(filter, Filters.pushDownNot(filter));
}
@Test
public void testFlatten()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.and(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2")
)
),
FilterTestUtils.selector("col3", "3")
);
final Filter expected = FilterTestUtils.and(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2"),
FilterTestUtils.selector("col3", "3")
);
Assert.assertEquals(expected, Filters.flatten(filter));
}
@Test
public void testFlattenUnflattenable()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "1"),
FilterTestUtils.selector("col2", "2")
),
FilterTestUtils.selector("col3", "3")
);
Assert.assertEquals(filter, Filters.flatten(filter));
}
@Test
public void testToCNFWithMuchReducibleFilter()
{
final Filter muchReducible = FilterTestUtils.and(
// should be flattened
FilterTestUtils.and(
FilterTestUtils.and(
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
)
),
// should be flattened
FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
)
),
// should be flattened
FilterTestUtils.or(
FilterTestUtils.and(
FilterTestUtils.or(FilterTestUtils.selector("col1", "val1"))
)
),
// should eliminate duplicate filters
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
),
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.and(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col1", "val1")
)
)
);
final Filter expected = FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
);
Assert.assertEquals(expected, Filters.toCNF(muchReducible));
}
@Test
public void testToCNFWithComplexFilterIncludingNotAndOr()
{
final Filter filter = FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val2")
),
FilterTestUtils.not(
FilterTestUtils.and(
FilterTestUtils.selector("col4", "val4"),
FilterTestUtils.selector("col5", "val5")
)
)
),
FilterTestUtils.or(
FilterTestUtils.not(
FilterTestUtils.or(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.selector("col4", "val4"),
FilterTestUtils.selector("col5", "val5")
)
),
FilterTestUtils.and(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col3", "val3")
)
),
FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val22"), // selecting different value
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.not(
FilterTestUtils.selector("col1", "val11")
)
),
FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val22"),
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.not(
FilterTestUtils.selector("col1", "val11") // selecting different value
)
)
);
final Filter expected = FilterTestUtils.and(
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.selector("col2", "val22"),
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2"))
),
FilterTestUtils.or(
FilterTestUtils.not(FilterTestUtils.selector("col2", "val2")),
FilterTestUtils.selector("col3", "val3")
),
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col3", "val3"),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
),
FilterTestUtils.not(FilterTestUtils.selector("col1", "val11")),
// The below OR filter could be eliminated because this filter also has
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
// The reduction process would be
// (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
// => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5))
// => (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
// However, we don't have this reduction now, so we have a filter in a suboptimized CNF.
FilterTestUtils.or(
FilterTestUtils.selector("col1", "val1"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
),
FilterTestUtils.or(
FilterTestUtils.selector("col2", "val2"),
FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
)
);
Assert.assertEquals(expected, Filters.toCNF(filter));
}
private static BitmapIndex getBitmapIndex(final List<ImmutableBitmap> bitmapList)
{
return new BitmapIndex()