From e677c62484bf02379ff8cb7855de1e3e43a4b4b1 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 16 Apr 2020 22:12:20 -0700 Subject: [PATCH] document useFilterCNF query context parameter (#9647) * document useFilterCNF query context parameter * move context key to QueryContexts * Update .spelling --- docs/querying/query-context.md | 1 + .../org/apache/druid/query/QueryContexts.java | 6 ++- .../apache/druid/segment/filter/Filters.java | 4 +- ...BaseHashJoinSegmentStorageAdapterTest.java | 2 +- .../HashJoinSegmentStorageAdapterTest.java | 46 +++++++++---------- .../segment/join/HashJoinSegmentTest.java | 4 +- .../segment/join/JoinFilterAnalyzerTest.java | 8 ++-- .../druid/segment/join/JoinablesTest.java | 6 +-- website/.spelling | 4 ++ 9 files changed, 44 insertions(+), 37 deletions(-) diff --git a/docs/querying/query-context.md b/docs/querying/query-context.md index df4dc84d70b..e497bd97bfa 100644 --- a/docs/querying/query-context.md +++ b/docs/querying/query-context.md @@ -54,6 +54,7 @@ These parameters apply to all query types. |parallelMergeParallelism|`druid.processing.merge.pool.parallelism`|Maximum number of parallel threads to use for parallel result merging on the Broker. See [Broker configuration](../configuration/index.html#broker) for more details.| |parallelMergeInitialYieldRows|`druid.processing.merge.task.initialYieldNumRows`|Number of rows to yield per ForkJoinPool merge task for parallel result merging on the Broker, before forking off a new task to continue merging sequences. See [Broker configuration](../configuration/index.html#broker) for more details.| |parallelMergeSmallBatchRows|`druid.processing.merge.task.smallBatchNumRows`|Size of result batches to operate on in ForkJoinPool merge tasks for parallel result merging on the Broker. See [Broker configuration](../configuration/index.html#broker) for more details.| +|useFilterCNF|`false`| If true, Druid will attempt to convert the query filter to Conjunctive Normal Form (CNF). During query processing, columns can be pre-filtered by intersecting the bitmap indexes of all values that match the eligible filters, often greatly reducing the raw number of rows which need to be scanned. But this effect only happens for the top level filter, or individual clauses of a top level 'and' filter. As such, filters in CNF potentially have a higher chance to utilize a large amount of bitmap indexes on string columns during pre-filtering. However, this setting should be used with great caution, as it can sometimes have a negative effect on performance, and in some cases, the act of computing CNF of a filter can be expensive. We recommend hand tuning your filters to produce an optimal form if possible, or at least verifying through experimentation that using this parameter actually improves your query performance with no ill-effects.| ## Query-type-specific parameters diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index ab3c02c353e..b5a7be0b7dd 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -52,6 +52,7 @@ public class QueryContexts public static final String JOIN_FILTER_REWRITE_ENABLE_KEY = "enableJoinFilterRewrite"; public static final String JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS_ENABLE_KEY = "enableJoinFilterRewriteValueColumnFilters"; public static final String JOIN_FILTER_REWRITE_MAX_SIZE_KEY = "joinFilterRewriteMaxSize"; + public static final String USE_FILTER_CNF_KEY = "useFilterCNF"; public static final boolean DEFAULT_BY_SEGMENT = false; public static final boolean DEFAULT_POPULATE_CACHE = true; @@ -67,7 +68,8 @@ public class QueryContexts public static final boolean DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN = true; public static final boolean DEFAULT_ENABLE_JOIN_FILTER_REWRITE = true; public static final boolean DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS = false; - public static final long DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY = 10000; + public static final long DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE = 10000; + public static final boolean DEFAULT_USE_FILTER_CNF = false; @SuppressWarnings("unused") // Used by Jackson serialization public enum Vectorize @@ -249,7 +251,7 @@ public class QueryContexts public static long getJoinFilterRewriteMaxSize(Query query) { - return parseLong(query, JOIN_FILTER_REWRITE_MAX_SIZE_KEY, DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY); + return parseLong(query, JOIN_FILTER_REWRITE_MAX_SIZE_KEY, DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE); } public static boolean getEnableJoinFilterPushDown(Query query) diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 830f3727fd2..72e343ce93f 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -28,6 +28,7 @@ import it.unimi.dsi.fastutil.ints.IntList; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.Query; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.DruidPredicateFactory; @@ -59,7 +60,6 @@ import java.util.stream.Collectors; */ public class Filters { - private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF"; /** * Convert a list of DimFilters to a list of Filters. @@ -423,7 +423,7 @@ public class Filters if (filter == null) { return null; } - boolean useCNF = query.getContextBoolean(CTX_KEY_USE_FILTER_CNF, false); + boolean useCNF = query.getContextBoolean(QueryContexts.USE_FILTER_CNF_KEY, QueryContexts.DEFAULT_USE_FILTER_CNF); return useCNF ? Filters.toCnf(filter) : filter; } diff --git a/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java index 9903269561a..a01a34b9c23 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java @@ -193,7 +193,7 @@ public class BaseHashJoinSegmentStorageAdapterTest true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); return new HashJoinSegmentStorageAdapter( diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index b8f30ffbd69..a9b02e24a12 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -307,7 +307,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -375,7 +375,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -437,7 +437,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -501,7 +501,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -571,7 +571,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -637,7 +637,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -678,7 +678,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -721,7 +721,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -769,7 +769,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -817,7 +817,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -864,7 +864,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -924,7 +924,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -990,7 +990,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1069,7 +1069,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( new HashJoinSegmentStorageAdapter( @@ -1136,7 +1136,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1187,7 +1187,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1255,7 +1255,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1315,7 +1315,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1373,7 +1373,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1433,7 +1433,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1493,7 +1493,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.readCursors( @@ -1539,7 +1539,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.readCursors( @@ -1572,7 +1572,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java index 4df728fc75a..40a4863e959 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java @@ -88,7 +88,7 @@ public class HashJoinSegmentTest true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); hashJoinSegment = new HashJoinSegment( @@ -113,7 +113,7 @@ public class HashJoinSegmentTest true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); final HashJoinSegment ignored = new HashJoinSegment( diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java index eb3b6fc64a8..ae97b7ea33b 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java @@ -471,7 +471,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter( @@ -1476,7 +1476,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes false, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter( factSegment.asStorageAdapter(), @@ -1548,7 +1548,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes true, false, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter( factSegment.asStorageAdapter(), @@ -1752,7 +1752,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); } } diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java b/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java index 002c6dd0bf8..4fa521dbcbc 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java @@ -102,7 +102,7 @@ public class JoinablesTest QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY, + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE, null, VirtualColumns.EMPTY ); @@ -131,7 +131,7 @@ public class JoinablesTest QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY, + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE, null, VirtualColumns.EMPTY ); @@ -168,7 +168,7 @@ public class JoinablesTest QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY, + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE, null, VirtualColumns.EMPTY ); diff --git a/website/.spelling b/website/.spelling index b0620471a34..531d0833b47 100644 --- a/website/.spelling +++ b/website/.spelling @@ -40,6 +40,7 @@ CORS CPUs CSVs Ceph +CNF ColumnDescriptor Corretto DDL @@ -307,6 +308,8 @@ pre-computation pre-compute pre-computing pre-configured +pre-filtered +pre-filtering pre-generated pre-made pre-processing @@ -380,6 +383,7 @@ unmergeable unmerged unparseable unparsed +useFilterCNF uptime uris useFieldDiscovery