Convert inQueryThreshold into query context parameter. (#12357)

Added Calcites InQueryThreshold as a query context parameter. Setting this parameter appropriately reduces the time taken for queries with large number of values in their IN conditions.
This commit is contained in:
Adarsh Sanjeev 2022-03-22 18:33:57 +05:30 committed by GitHub
parent 1f0447e613
commit ef45a1551e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 117 additions and 1 deletions

View File

@ -63,6 +63,7 @@ Unless otherwise noted, the following parameters apply to all query types.
|enableJoinLeftTableScanDirect|`false`|This flag applies to queries which have joins. For joins, where left child is a simple scan with a filter, by default, druid will run the scan as a query and the join the results to the right child on broker. Setting this flag to true overrides that behavior and druid will attempt to push the join to data servers instead. Please note that the flag could be applicable to queries even if there is no explicit join. since queries can internally translated into a join by the SQL planner.|
|debug| `false` | Flag indicating whether to enable debugging outputs for the query. When set to false, no additional logs will be produced (logs produced will be entirely dependent on your logging level). When set to true, the following addition logs will be produced:<br />- Log the stack trace of the exception (if any) produced by the query |
|maxNumericInFilters|`-1`|Max limit for the amount of numeric values that can be compared for a string type dimension when the entire SQL WHERE clause of a query translates only to an [OR](../querying/filters.md#or) of [Bound filter](../querying/filters.md#bound-filter). By default, Druid does not restrict the amount of of numeric Bound Filters on String columns, although this situation may block other queries from running. Set this property to a smaller value to prevent Druid from running queries that have prohibitively long segment processing times. The optimal limit requires some trial and error; we recommend starting with 100. Users who submit a query that exceeds the limit of `maxNumericInFilters` should instead rewrite their queries to use strings in the `WHERE` clause instead of numbers. For example, `WHERE someString IN (123, 456)`. This value cannot exceed the set system configuration `druid.sql.planner.maxNumericInFilters`. This value is ignored if `druid.sql.planner.maxNumericInFilters` is not set explicitly.|
|inSubQueryThreshold|`2147483647`| Threshold for minimum number of values in an IN clause to convert the query to a JOIN operation on an inlined table rather than a predicate. A threshold of 0 forces usage of an inline table in all cases; a threshold of [Integer.MAX_VALUE] forces usage of OR in all cases. |
## Parameters by query type
Some query types offer context parameters specific to that query type.

View File

@ -69,6 +69,7 @@ public class QueryContexts
public static final String ENABLE_DEBUG = "debug";
public static final String BY_SEGMENT_KEY = "bySegment";
public static final String BROKER_SERVICE_NAME = "brokerService";
public static final String IN_SUB_QUERY_THRESHOLD_KEY = "inSubQueryThreshold";
public static final boolean DEFAULT_BY_SEGMENT = false;
public static final boolean DEFAULT_POPULATE_CACHE = true;
@ -91,6 +92,7 @@ public class QueryContexts
public static final boolean DEFAULT_USE_FILTER_CNF = false;
public static final boolean DEFAULT_SECONDARY_PARTITION_PRUNING = true;
public static final boolean DEFAULT_ENABLE_DEBUG = false;
public static final int DEFAULT_IN_SUB_QUERY_THRESHOLD = Integer.MAX_VALUE;
@SuppressWarnings("unused") // Used by Jackson serialization
public enum Vectorize
@ -335,6 +337,16 @@ public class QueryContexts
return parseBoolean(queryContext, ENABLE_DEBUG, DEFAULT_ENABLE_DEBUG);
}
public static int getInSubQueryThreshold(Map<String, Object> context)
{
return getInSubQueryThreshold(context, DEFAULT_IN_SUB_QUERY_THRESHOLD);
}
public static int getInSubQueryThreshold(Map<String, Object> context, int defaultValue)
{
return parseInt(context, IN_SUB_QUERY_THRESHOLD_KEY, defaultValue);
}
public static <T> Query<T> withMaxScatterGatherBytes(Query<T> query, long maxScatterGatherBytesLimit)
{
Object obj = query.getContextValue(MAX_SCATTER_GATHER_BYTES_KEY);
@ -441,6 +453,12 @@ public class QueryContexts
return val == null ? defaultValue : Numbers.parseInt(val);
}
static int parseInt(Map<String, Object> context, String key, int defaultValue)
{
final Object val = context.get(key);
return val == null ? defaultValue : Numbers.parseInt(val);
}
static <T> boolean parseBoolean(Query<T> query, String key, boolean defaultValue)
{
final Object val = query.getContextValue(key);

View File

@ -133,6 +133,13 @@ public class QueryContextsTest
Assert.assertTrue(QueryContexts.isSecondaryPartitionPruningEnabled(query));
}
@Test
public void testDefaultInSubQueryThreshold()
{
Assert.assertEquals(QueryContexts.DEFAULT_IN_SUB_QUERY_THRESHOLD,
QueryContexts.getInSubQueryThreshold(ImmutableMap.of()));
}
@Test
public void testGetEnableJoinLeftScanDirect()
{

View File

@ -38,6 +38,7 @@ import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.ValidationException;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.server.security.Access;
import org.apache.druid.server.security.AuthorizerMapper;
import org.apache.druid.server.security.NoopEscalator;
@ -150,7 +151,7 @@ public class PlannerFactory
.withExpand(false)
.withDecorrelationEnabled(false)
.withTrimUnusedFields(false)
.withInSubQueryThreshold(Integer.MAX_VALUE)
.withInSubQueryThreshold(QueryContexts.getInSubQueryThreshold(plannerContext.getQueryContext()))
.build();
return Frameworks
.newConfigBuilder()

View File

@ -4988,4 +4988,57 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest
ImmutableList.of(new Object[]{4.0F, 4.0F})
);
}
@Test
public void testPlanWithInFilterMoreThanInSubQueryThreshold() throws Exception
{
String query = "SELECT l1 FROM numfoo WHERE l1 IN (4842, 4844, 4845, 14905, 4853, 29064)";
Map<String, Object> queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT);
queryContext.put(QueryContexts.IN_SUB_QUERY_THRESHOLD_KEY, 3);
testQuery(
PLANNER_CONFIG_DEFAULT,
queryContext,
DEFAULT_PARAMETERS,
query,
CalciteTests.REGULAR_USER_AUTH_RESULT,
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(
JoinDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
InlineDataSource.fromIterable(
ImmutableList.of(
new Object[]{4842L},
new Object[]{4844L},
new Object[]{4845L},
new Object[]{14905L},
new Object[]{4853L},
new Object[]{29064L}
),
RowSignature.builder()
.add("ROW_VALUE", ColumnType.LONG)
.build()
),
"j0.",
"(\"l1\" == \"j0.ROW_VALUE\")",
JoinType.INNER,
null,
ExprMacroTable.nil()
)
)
.columns("l1")
.intervals(querySegmentSpec(Filtration.eternity()))
.context(queryContext)
.legacy(false)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.build()
),
(sql, result) -> {
// Ignore the results, only need to check that the type of query is a join.
},
null
);
}
}

View File

@ -13850,4 +13850,39 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
"Possible error: SQL requires 'UNION' but only 'UNION ALL' is supported."
);
}
@Test
public void testPlanWithInFilterLessThanInSubQueryThreshold() throws Exception
{
String query = "SELECT l1 FROM numfoo WHERE l1 IN (4842, 4844, 4845, 14905, 4853, 29064)";
testQuery(
PLANNER_CONFIG_DEFAULT,
QUERY_CONTEXT_DEFAULT,
DEFAULT_PARAMETERS,
query,
CalciteTests.REGULAR_USER_AUTH_RESULT,
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE3)
.columns("l1")
.intervals(querySegmentSpec(Filtration.eternity()))
.context(QUERY_CONTEXT_DEFAULT)
.legacy(false)
.filters(
in(
"l1",
ImmutableList.of("4842", "4844", "4845", "14905", "4853", "29064"),
null
)
)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.build()
),
(sql, result) -> {
// Ignore the results, only need to check that the type of query is a filter.
},
null
);
}
}

View File

@ -294,6 +294,7 @@ influxdb
ingestionSpec
injective
inlined
inSubQueryThreshold
interruptible
jackson-jq
javadoc