Reading entire filter tree to address combinations of ANDs and ORs

This commit is contained in:
Soumyava Das 2022-02-06 20:03:42 -08:00
parent e2b4ae85ed
commit 81ca8f8496
2 changed files with 106 additions and 15 deletions

View File

@ -35,7 +35,6 @@ import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
@ -43,7 +42,9 @@ import org.apache.druid.math.expr.Evals;
import org.apache.druid.query.InlineDataSource;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.query.filter.AndDimFilter;
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.OrDimFilter;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.spec.QuerySegmentSpec;
@ -138,21 +139,18 @@ public class NativeQueryMaker implements QueryMaker
// and BoundFilter.match predicate eating up processing time which stalls a historical for a query with large number
// of numeric INs (> 10K). In such cases user should change the query to specify the IN clauses as String
// Instead of IN(v1,v2,v3) user should specify IN('v1','v2','v3')
if (numFilters != PlannerConfig.NUM_FILTER_NOT_USED) {
if (query.getFilter() instanceof OrDimFilter) {
OrDimFilter orDimFilter = (OrDimFilter) query.getFilter();
if (orDimFilter.getFields().size() > numFilters) {
String dimension = ((BoundDimFilter) (orDimFilter.getFields().get(0))).getDimension();
throw new UOE(StringUtils.format(
"The number of values in the IN clause for [%s] in query exceeds configured maxNumericFilter limit of [%s] for INs. Cast [%s] values of IN clause to String",
dimension,
numFilters,
orDimFilter.getFields().size()
));
}
}
}
int countOfFilters;
if (numFilters != PlannerConfig.NUM_FILTER_NOT_USED) {
countOfFilters = countFilters(query.getFilter());
if (countOfFilters > numFilters) {
throw new UOE(
"The number of values in the WHERE clause exceeds configured maxNumericFilter limit of [%s]. Cast [%s] values of WHERE clause to String",
numFilters,
countOfFilters
);
}
}
final List<String> rowOrder;
if (query instanceof TimeseriesQuery && !druidQuery.getGrouping().getDimensions().isEmpty()) {
@ -182,6 +180,23 @@ public class NativeQueryMaker implements QueryMaker
);
}
private int countFilters(DimFilter filter)
{
int count = 0;
if (filter instanceof BoundDimFilter) {
count += 1;
} else if (filter instanceof OrDimFilter) {
for (DimFilter dimFilter : ((OrDimFilter) filter).getFields()) {
count += countFilters(dimFilter);
}
} else if (filter instanceof AndDimFilter) {
for (DimFilter dimFilter : ((AndDimFilter) filter).getFields()) {
count += countFilters(dimFilter);
}
}
return count;
}
private List<Interval> findBaseDataSourceIntervals(Query<?> query)
{
return DataSourceAnalysis.forDataSource(query.getDataSource())

View File

@ -6915,7 +6915,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
ImmutableMap.of(QueryContexts.MAX_NUMERIC_IN_FILTERS, 0),
"SELECT COUNT(*)\n"
+ "FROM druid.numfoo\n"
+ "WHERE dim6 IN (\n"
+ "WHERE dim1=1 OR dim6 IN (\n"
+ "1,2,3\n"
+ ")\n",
CalciteTests.REGULAR_USER_AUTH_RESULT,
@ -6948,7 +6948,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
public void testQueryWithMoreThanMaxNumericInFilter() throws Exception
{
expectedException.expect(UOE.class);
expectedException.expectMessage("The number of values in the IN clause for [dim6] in query exceeds configured maxNumericFilter limit of [2] for INs. Cast [3] values of IN clause to String");
expectedException.expectMessage("The number of values in the WHERE clause exceeds configured maxNumericFilter limit of [2]. Cast [3] values of WHERE clause to String");
testQuery(
PLANNER_CONFIG_MAX_NUMERIC_IN_FILTER,
@ -6964,6 +6964,82 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testQueryAndsOfOrsNumericInFilter() throws Exception
{
expectedException.expect(UOE.class);
expectedException.expectMessage("The number of values in the WHERE clause exceeds configured maxNumericFilter limit of [2]. Cast [5] values of WHERE clause to String");
testQuery(
PLANNER_CONFIG_MAX_NUMERIC_IN_FILTER,
ImmutableMap.of(QueryContexts.MAX_NUMERIC_IN_FILTERS, 2),
"SELECT COUNT(*)\n"
+ "FROM druid.numfoo\n"
+ "WHERE dim1=1 AND dim2=2 AND dim6 IN (\n"
+ "1,2,3\n"
+ ")\n",
CalciteTests.REGULAR_USER_AUTH_RESULT,
ImmutableList.of(),
ImmutableList.of()
);
}
@Test
public void testQueryOfAndsNumericInFilter() throws Exception
{
expectedException.expect(UOE.class);
expectedException.expectMessage("The number of values in the WHERE clause exceeds configured maxNumericFilter limit of [2]. Cast [3] values of WHERE clause to String");
testQuery(
PLANNER_CONFIG_MAX_NUMERIC_IN_FILTER,
ImmutableMap.of(QueryContexts.MAX_NUMERIC_IN_FILTERS, 2),
"SELECT COUNT(*)\n"
+ "FROM druid.numfoo\n"
+ "WHERE dim1=1 AND dim2=2 AND dim6=3",
CalciteTests.REGULAR_USER_AUTH_RESULT,
ImmutableList.of(),
ImmutableList.of()
);
}
@Test
public void testQueryOfOrsNumericInFilter() throws Exception
{
expectedException.expect(UOE.class);
expectedException.expectMessage("The number of values in the WHERE clause exceeds configured maxNumericFilter limit of [2]. Cast [6] values of WHERE clause to String");
testQuery(
PLANNER_CONFIG_MAX_NUMERIC_IN_FILTER,
ImmutableMap.of(QueryContexts.MAX_NUMERIC_IN_FILTERS, 2),
"SELECT COUNT(*)\n"
+ "FROM druid.numfoo\n"
+ "WHERE dim1 IN (1,2,3) OR dim6 IN (1,2,3)",
CalciteTests.REGULAR_USER_AUTH_RESULT,
ImmutableList.of(),
ImmutableList.of()
);
}
@Test
public void testQueryNOTWithNumericInFilter() throws Exception
{
expectedException.expect(UOE.class);
expectedException.expectMessage("The number of values in the WHERE clause exceeds configured maxNumericFilter limit of [2]. Cast [3] values of WHERE clause to String");
testQuery(
PLANNER_CONFIG_MAX_NUMERIC_IN_FILTER,
ImmutableMap.of(QueryContexts.MAX_NUMERIC_IN_FILTERS, 2),
"SELECT COUNT(*)\n"
+ "FROM druid.numfoo\n"
+ "WHERE dim6 NOT IN (\n"
+ "1,2,3\n"
+ ")\n",
CalciteTests.REGULAR_USER_AUTH_RESULT,
ImmutableList.of(),
ImmutableList.of()
);
}
@Test
public void testExplainExactCountDistinctOfSemiJoinResult() throws Exception