Add DimFilter.toOptimizedFilter(), ensure that join filter pre-analysis operates on optimized filters (#10056)

* Ensure that join filter pre-analysis operates on optimized filters, add DimFilter.toOptimizedFilter

* Remove aggressive equality check that was used for testing

* Use Suppliers.memoize

* Checkstyle
This commit is contained in:
Jonathan Wei 2020-07-01 22:26:17 -07:00 committed by GitHub
parent 367eaedbb4
commit ed981ef88e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 170 additions and 66 deletions

View File

@ -38,7 +38,7 @@ import java.util.Set;
/**
*/
public class BloomDimFilter implements DimFilter
public class BloomDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;

View File

@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
/**
* Base class for DimFilters that support optimization. This abstract class provides a default implementation of
* toOptimizedFilter that relies on the existing optimize() and toFilter() methods. It uses a memoized supplier.
*/
abstract class AbstractOptimizableDimFilter implements DimFilter
{
private final Supplier<Filter> cachedOptimizedFilter = Suppliers.memoize(
() -> optimize().toFilter()
);
@JsonIgnore
@Override
public Filter toOptimizedFilter()
{
return cachedOptimizedFilter.get();
}
}

View File

@ -37,7 +37,7 @@ import java.util.stream.Collectors;
/**
*/
public class AndDimFilter implements DimFilter
public class AndDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private static final Joiner AND_JOINER = Joiner.on(" && ");

View File

@ -47,7 +47,7 @@ import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.Set;
public class BoundDimFilter implements DimFilter
public class BoundDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;
@Nullable

View File

@ -35,7 +35,7 @@ import java.util.stream.Collectors;
/**
*/
public class ColumnComparisonDimFilter implements DimFilter
public class ColumnComparisonDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private static final Joiner COMMA_JOINER = Joiner.on(", ");

View File

@ -58,6 +58,18 @@ public interface DimFilter extends Cacheable
*/
DimFilter optimize();
/**
* @return Return a Filter that implements this DimFilter, after applying optimizations to this DimFilter.
* A typical implementation will return the result of `optimize().toFilter()`
* See abstract base class {@link AbstractOptimizableDimFilter} for a common implementation shared by
* current DimFilters.
*
* The Filter returned by this method across multiple calls must be the same object: parts of the query stack
* compare Filters, and returning the same object allows these checks to avoid deep comparisons.
* (see {@link org.apache.druid.segment.join.HashJoinSegmentStorageAdapter#makeCursors for an example}
*/
Filter toOptimizedFilter();
/**
* Returns a Filter that implements this DimFilter. This does not generally involve optimizing the DimFilter,
* so it does make sense to optimize first and then call toFilter on the resulting DimFilter.

View File

@ -37,7 +37,7 @@ import javax.annotation.Nullable;
import java.util.Objects;
import java.util.Set;
public class ExpressionDimFilter implements DimFilter
public class ExpressionDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String expression;
private final Supplier<Expr> parsed;

View File

@ -34,7 +34,7 @@ import java.util.Set;
* This class is deprecated, use SelectorDimFilter instead: {@link SelectorDimFilter}
*/
@Deprecated
public class ExtractionDimFilter implements DimFilter
public class ExtractionDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;
private final String value;

View File

@ -29,7 +29,7 @@ import javax.annotation.Nullable;
import java.util.Collections;
import java.util.Set;
public class FalseDimFilter implements DimFilter
public class FalseDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private static final FalseDimFilter INSTANCE = new FalseDimFilter();
private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.FALSE_CACHE_ID).build();

View File

@ -61,7 +61,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
public class InDimFilter implements DimFilter
public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
// determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements
// Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet

View File

@ -42,7 +42,7 @@ import java.util.List;
import java.util.Objects;
import java.util.Set;
public class IntervalDimFilter implements DimFilter
public class IntervalDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final List<Interval> intervals;
private final List<Pair<Long, Long>> intervalLongs;

View File

@ -43,7 +43,7 @@ import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.Set;
public class JavaScriptDimFilter implements DimFilter
public class JavaScriptDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;
private final String function;

View File

@ -41,7 +41,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.regex.Pattern;
public class LikeDimFilter implements DimFilter
public class LikeDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
// Regex matching characters that are definitely okay to include unescaped in a regex.
// Leads to excessively paranoid escaping, although shouldn't affect runtime beyond compiling the regex.

View File

@ -32,7 +32,7 @@ import java.util.Set;
/**
*/
public class NotDimFilter implements DimFilter
public class NotDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final DimFilter field;

View File

@ -38,7 +38,7 @@ import java.util.stream.Collectors;
/**
*/
public class OrDimFilter implements DimFilter
public class OrDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private static final Joiner OR_JOINER = Joiner.on(" || ");

View File

@ -38,7 +38,7 @@ import java.util.regex.Pattern;
/**
*/
public class RegexDimFilter implements DimFilter
public class RegexDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;
private final String pattern;

View File

@ -37,7 +37,7 @@ import java.util.Set;
/**
*/
public class SearchQueryDimFilter implements DimFilter
public class SearchQueryDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;
private final SearchQuerySpec query;

View File

@ -41,7 +41,7 @@ import java.util.Set;
/**
*
*/
public class SelectorDimFilter implements DimFilter
public class SelectorDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;

View File

@ -37,7 +37,7 @@ import java.util.Set;
/**
*/
public class SpatialDimFilter implements DimFilter
public class SpatialDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private final String dimension;
private final Bound bound;

View File

@ -27,7 +27,7 @@ import org.apache.druid.segment.filter.TrueFilter;
import java.util.Collections;
import java.util.Set;
public class TrueDimFilter implements DimFilter
public class TrueDimFilter extends AbstractOptimizableDimFilter implements DimFilter
{
private static final TrueDimFilter INSTANCE = new TrueDimFilter();
private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.TRUE_CACHE_ID).build();

View File

@ -472,13 +472,9 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
public Sequence<ResultRow> run(QueryPlus<ResultRow> queryPlus, ResponseContext responseContext)
{
GroupByQuery groupByQuery = (GroupByQuery) queryPlus.getQuery();
if (groupByQuery.getDimFilter() != null) {
groupByQuery = groupByQuery.withDimFilter(groupByQuery.getDimFilter().optimize());
}
final GroupByQuery delegateGroupByQuery = groupByQuery;
final List<DimensionSpec> dimensionSpecs = new ArrayList<>();
final BitSet optimizedDimensions = extractionsToRewrite(delegateGroupByQuery);
final List<DimensionSpec> dimensions = delegateGroupByQuery.getDimensions();
final BitSet optimizedDimensions = extractionsToRewrite(groupByQuery);
final List<DimensionSpec> dimensions = groupByQuery.getDimensions();
for (int i = 0; i < dimensions.size(); i++) {
final DimensionSpec dimensionSpec = dimensions.get(i);
if (optimizedDimensions.get(i)) {
@ -491,7 +487,7 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
}
return runner.run(
queryPlus.withQuery(delegateGroupByQuery.withDimensionSpecs(dimensionSpecs)),
queryPlus.withQuery(groupByQuery.withDimensionSpecs(dimensionSpecs)),
responseContext
);
}

View File

@ -294,11 +294,6 @@ public class ScanQuery extends BaseQuery<ScanResultValue>
return Druids.ScanQueryBuilder.copy(this).context(computeOverriddenContext(getContext(), contextOverrides)).build();
}
public ScanQuery withDimFilter(DimFilter dimFilter)
{
return Druids.ScanQueryBuilder.copy(this).filters(dimFilter).build();
}
@Override
public boolean equals(final Object o)
{

View File

@ -118,11 +118,6 @@ public class ScanQueryQueryToolChest extends QueryToolChest<ScanResultValue, Sca
public QueryRunner<ScanResultValue> preMergeQueryDecoration(final QueryRunner<ScanResultValue> runner)
{
return (queryPlus, responseContext) -> {
ScanQuery scanQuery = (ScanQuery) queryPlus.getQuery();
if (scanQuery.getFilter() != null) {
scanQuery = scanQuery.withDimFilter(scanQuery.getFilter().optimize());
queryPlus = queryPlus.withQuery(scanQuery);
}
return runner.run(queryPlus, responseContext);
};
}

View File

@ -109,11 +109,6 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
return Druids.SearchQueryBuilder.copy(this).context(newContext).build();
}
public SearchQuery withDimFilter(DimFilter dimFilter)
{
return Druids.SearchQueryBuilder.copy(this).filters(dimFilter).build();
}
@JsonProperty("filter")
public DimFilter getDimensionsFilter()
{

View File

@ -322,11 +322,6 @@ public class SearchQueryQueryToolChest extends QueryToolChest<Result<SearchResul
{
return new SearchThresholdAdjustingQueryRunner(
(queryPlus, responseContext) -> {
SearchQuery searchQuery = (SearchQuery) queryPlus.getQuery();
if (searchQuery.getDimensionsFilter() != null) {
searchQuery = searchQuery.withDimFilter(searchQuery.getDimensionsFilter().optimize());
queryPlus = queryPlus.withQuery(searchQuery);
}
return runner.run(queryPlus, responseContext);
},
config

View File

@ -377,11 +377,6 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest<Result<Timeser
public QueryRunner<Result<TimeseriesResultValue>> preMergeQueryDecoration(final QueryRunner<Result<TimeseriesResultValue>> runner)
{
return (queryPlus, responseContext) -> {
TimeseriesQuery timeseriesQuery = (TimeseriesQuery) queryPlus.getQuery();
if (timeseriesQuery.getDimensionsFilter() != null) {
timeseriesQuery = timeseriesQuery.withDimFilter(timeseriesQuery.getDimensionsFilter().optimize());
queryPlus = queryPlus.withQuery(timeseriesQuery);
}
return runner.run(queryPlus, responseContext);
};
}

View File

@ -203,11 +203,6 @@ public class TopNQuery extends BaseQuery<Result<TopNResultValue>>
return new TopNQueryBuilder(this).context(computeOverriddenContext(getContext(), contextOverrides)).build();
}
public TopNQuery withDimFilter(DimFilter dimFilter)
{
return new TopNQueryBuilder(this).filters(dimFilter).build();
}
@Override
public String toString()
{

View File

@ -425,14 +425,10 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
{
return (queryPlus, responseContext) -> {
TopNQuery topNQuery = (TopNQuery) queryPlus.getQuery();
if (topNQuery.getDimensionsFilter() != null) {
topNQuery = topNQuery.withDimFilter(topNQuery.getDimensionsFilter().optimize());
}
final TopNQuery delegateTopNQuery = topNQuery;
if (TopNQueryEngine.canApplyExtractionInPost(delegateTopNQuery)) {
final DimensionSpec dimensionSpec = delegateTopNQuery.getDimensionSpec();
if (TopNQueryEngine.canApplyExtractionInPost(topNQuery)) {
final DimensionSpec dimensionSpec = topNQuery.getDimensionSpec();
QueryPlus<Result<TopNResultValue>> delegateQueryPlus = queryPlus.withQuery(
delegateTopNQuery.withDimensionSpec(
topNQuery.withDimensionSpec(
new DefaultDimensionSpec(
dimensionSpec.getDimension(),
dimensionSpec.getOutputName()
@ -441,7 +437,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
);
return runner.run(delegateQueryPlus, responseContext);
} else {
return runner.run(queryPlus.withQuery(delegateTopNQuery), responseContext);
return runner.run(queryPlus.withQuery(topNQuery), responseContext);
}
};
}

View File

@ -86,7 +86,7 @@ public class Filters
@Nullable
public static Filter toFilter(@Nullable DimFilter dimFilter)
{
return dimFilter == null ? null : dimFilter.toFilter();
return dimFilter == null ? null : dimFilter.toOptimizedFilter();
}
/**

View File

@ -42,6 +42,9 @@ public class FalseDimFilterTest
@Test
public void testEquals()
{
EqualsVerifier.forClass(FalseDimFilter.class).usingGetClass().verify();
EqualsVerifier.forClass(FalseDimFilter.class)
.usingGetClass()
.withIgnoredFields("cachedOptimizedFilter")
.verify();
}
}

View File

@ -42,6 +42,9 @@ public class TrueDimFilterTest
@Test
public void testEquals()
{
EqualsVerifier.forClass(TrueDimFilter.class).usingGetClass().verify();
EqualsVerifier.forClass(TrueDimFilter.class)
.usingGetClass()
.withIgnoredFields("cachedOptimizedFilter")
.verify();
}
}

View File

@ -36,10 +36,12 @@ import org.apache.druid.java.util.common.JodaUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.Druids;
import org.apache.druid.query.GlobalTableDataSource;
import org.apache.druid.query.LookupDataSource;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.QueryException;
@ -85,6 +87,7 @@ import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.filter.RegexDimFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec.Direction;
@ -97,6 +100,8 @@ import org.apache.druid.query.topn.NumericTopNMetricSpec;
import org.apache.druid.query.topn.TopNQueryBuilder;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.join.JoinType;
import org.apache.druid.server.QueryLifecycle;
import org.apache.druid.server.QueryLifecycleFactory;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.planner.Calcites;
@ -12003,6 +12008,83 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
);
}
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testGroupByJoinAsNativeQueryWithUnoptimizedFilter(Map<String, Object> queryContext)
{
// The query below is the same as the inner groupBy on a join datasource from the test
// testNestedGroupByOnInlineDataSourceWithFilter, except that the selector filter
// dim1=def has been rewritten into an unoptimized filter, dim1 IN (def).
//
// The unoptimized filter will be optimized into dim1=def by the query toolchests in their
// pre-merge decoration function, when it calls DimFilter.optimize().
//
// This test's goal is to ensure that the join filter rewrites function correctly when there are
// unoptimized filters in the join query. The rewrite logic must apply to the optimized form of the filters,
// as this is what will be passed to HashJoinSegmentAdapter.makeCursors(), where the result of the join
// filter pre-analysis is used.
//
// A native query is used because the filter types where we support optimization are the AND/OR/NOT and
// IN filters. However, when expressed in a SQL query, our SQL planning layer is smart enough to already apply
// these optimizations in the native query it generates, making it impossible to test the unoptimized filter forms
// using SQL queries.
//
// The test method is placed here for convenience as this class provides the necessary setup.
Query query = GroupByQuery
.builder()
.setDataSource(
join(
new QueryDataSource(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z")))
.columns("dim1")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.context(queryContext)
.build()
),
new QueryDataSource(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z")))
.columns("dim1", "m2")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.context(queryContext)
.build()
),
"j0.",
equalsCondition(
DruidExpression.fromColumn("dim1"),
DruidExpression.fromColumn("j0.dim1")
),
JoinType.INNER
)
)
.setGranularity(Granularities.ALL)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setDimFilter(in("dim1", Collections.singletonList("def"), null)) // provide an unoptimized IN filter
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0")
)
)
.setVirtualColumns(expressionVirtualColumn("v0", "'def'", ValueType.STRING))
.build();
QueryLifecycleFactory qlf = CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate);
QueryLifecycle ql = qlf.factorize();
Sequence seq = ql.runSimple(
query,
CalciteTests.SUPER_USER_AUTH_RESULT,
null
);
List<Object> results = seq.toList();
Assert.assertEquals(
ImmutableList.of(ResultRow.of("def")),
results
);
}
@Test
public void testUsingSubqueryAsFilterOnTwoColumns() throws Exception
{