diff --git a/docs/querying/sql-query-context.md b/docs/querying/sql-query-context.md index 59b2f74a958..bc1fdf7a07d 100644 --- a/docs/querying/sql-query-context.md +++ b/docs/querying/sql-query-context.md @@ -41,6 +41,7 @@ Configure Druid SQL query planning using the parameters in the table below. |`useApproximateCountDistinct`|Whether to use an approximate cardinality algorithm for `COUNT(DISTINCT foo)`.|druid.sql.planner.useApproximateCountDistinct on the Broker (default: true)| |`useGroupingSetForExactDistinct`|Whether to use grouping sets to execute queries with multiple exact distinct aggregations.|druid.sql.planner.useGroupingSetForExactDistinct on the Broker (default: false)| |`useApproximateTopN`|Whether to use approximate [TopN queries](topnquery.md) when a SQL query could be expressed as such. If false, exact [GroupBy queries](groupbyquery.md) will be used instead.|druid.sql.planner.useApproximateTopN on the Broker (default: true)| +|`enableTimeBoundaryPlanning`|If true, SQL queries will get converted to TimeBoundary queries wherever possible. TimeBoundary queries are very efficient for min-max calculation on __time column in a datasource |druid.query.default.context.enableTimeBoundaryPlanning on the Broker (default: false)| ## Setting the query context The query context parameters can be specified as a "context" object in the [JSON API](sql-api.md) or as a [JDBC connection properties object](sql-jdbc.md). diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index 5768c6a0670..a4defa7b08e 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -70,6 +70,7 @@ public class QueryContexts public static final String BY_SEGMENT_KEY = "bySegment"; public static final String BROKER_SERVICE_NAME = "brokerService"; public static final String IN_SUB_QUERY_THRESHOLD_KEY = "inSubQueryThreshold"; + public static final String TIME_BOUNDARY_PLANNING_KEY = "enableTimeBoundaryPlanning"; public static final boolean DEFAULT_BY_SEGMENT = false; public static final boolean DEFAULT_POPULATE_CACHE = true; @@ -93,6 +94,7 @@ public class QueryContexts public static final boolean DEFAULT_SECONDARY_PARTITION_PRUNING = true; public static final boolean DEFAULT_ENABLE_DEBUG = false; public static final int DEFAULT_IN_SUB_QUERY_THRESHOLD = Integer.MAX_VALUE; + public static final boolean DEFAULT_ENABLE_TIME_BOUNDARY_PLANNING = false; @SuppressWarnings("unused") // Used by Jackson serialization public enum Vectorize @@ -347,6 +349,11 @@ public class QueryContexts return parseInt(context, IN_SUB_QUERY_THRESHOLD_KEY, defaultValue); } + public static boolean isTimeBoundaryPlanningEnabled(Map queryContext) + { + return parseBoolean(queryContext, TIME_BOUNDARY_PLANNING_KEY, DEFAULT_ENABLE_TIME_BOUNDARY_PLANNING); + } + public static Query withMaxScatterGatherBytes(Query query, long maxScatterGatherBytesLimit) { Object obj = query.getContextValue(MAX_SCATTER_GATHER_BYTES_KEY); diff --git a/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java b/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java index 7cf1f94efb3..c9f0c8b482a 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java +++ b/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java @@ -140,6 +140,13 @@ public class QueryContextsTest QueryContexts.getInSubQueryThreshold(ImmutableMap.of())); } + @Test + public void testDefaultPlanTimeBoundarySql() + { + Assert.assertEquals(QueryContexts.DEFAULT_ENABLE_TIME_BOUNDARY_PLANNING, + QueryContexts.isTimeBoundaryPlanningEnabled(ImmutableMap.of())); + } + @Test public void testGetEnableJoinLeftScanDirect() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index f9f5083f17b..08aa08ed87b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -804,7 +804,7 @@ public class DruidQuery } } - final TimeBoundaryQuery timeBoundaryQuery = toTimeBoundaryQuery(); + final TimeBoundaryQuery timeBoundaryQuery = toTimeBoundaryQuery(queryFeatureInspector); if (timeBoundaryQuery != null) { return timeBoundaryQuery; } @@ -838,9 +838,10 @@ public class DruidQuery * @return a TimeBoundaryQuery if possible. null if it is not possible to construct one. */ @Nullable - private TimeBoundaryQuery toTimeBoundaryQuery() + private TimeBoundaryQuery toTimeBoundaryQuery(QueryFeatureInspector queryFeatureInspector) { - if (grouping == null + if (!queryFeatureInspector.feature(QueryFeature.CAN_RUN_TIME_BOUNDARY) + || grouping == null || grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) || grouping.getHavingFilter() != null || selectProjection != null) { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java index a74a68b518a..3e3dc0b15ef 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java @@ -42,6 +42,7 @@ import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.math.expr.Evals; import org.apache.druid.query.InlineDataSource; import org.apache.druid.query.Query; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryToolChest; import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; @@ -112,6 +113,8 @@ public class NativeQueryMaker implements QueryMaker case CAN_READ_EXTERNAL_DATA: case SCAN_CAN_ORDER_BY_NON_TIME: return false; + case CAN_RUN_TIME_BOUNDARY: + return QueryContexts.isTimeBoundaryPlanningEnabled(plannerContext.getQueryContext().getMergedParams()); default: throw new IAE("Unrecognized feature: %s", feature); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java index 7a1cdee319e..363837435da 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java @@ -46,4 +46,9 @@ public enum QueryFeature * other than the "__time" column. */ SCAN_CAN_ORDER_BY_NON_TIME, + + /** + * Queries of type {@link org.apache.druid.query.timeboundary.TimeBoundaryQuery} are usable. + */ + CAN_RUN_TIME_BOUNDARY } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 1c4227102f4..aad48cff19c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -2388,11 +2388,13 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest cannotVectorize(); } + Map updatedQueryContext = new HashMap<>(queryContext); + updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); Map maxTimeQueryContext = new HashMap<>(queryContext); maxTimeQueryContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, "a0"); testQuery( "SELECT DISTINCT __time FROM druid.foo WHERE __time IN (SELECT MAX(__time) FROM druid.foo)", - queryContext, + updatedQueryContext, ImmutableList.of( GroupByQuery.builder() .setDataSource( @@ -2434,11 +2436,13 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest // Cannot vectorize JOIN operator. cannotVectorize(); + Map updatedQueryContext = new HashMap<>(queryContext); + updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); Map maxTimeQueryContext = new HashMap<>(queryContext); maxTimeQueryContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, "a0"); testQuery( "SELECT DISTINCT __time FROM druid.foo WHERE __time NOT IN (SELECT MAX(__time) FROM druid.foo)", - queryContext, + updatedQueryContext, ImmutableList.of( GroupByQuery.builder() .setDataSource( @@ -3568,6 +3572,8 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest cannotVectorize(); } + Map updatedQueryContext = new HashMap<>(queryContext); + updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); Map maxTimeQueryContext = new HashMap<>(queryContext); maxTimeQueryContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, "a0"); testQuery( @@ -3576,7 +3582,7 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest + "AND __time IN (SELECT MAX(__time) FROM foo WHERE cnt = 1)\n" + "AND __time IN (SELECT MAX(__time) FROM foo WHERE cnt <> 2)\n" + "GROUP BY 1", - queryContext, + updatedQueryContext, ImmutableList.of( GroupByQuery.builder() .setDataSource( @@ -3628,6 +3634,8 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest { cannotVectorize(); + Map updatedQueryContext = new HashMap<>(queryContext); + updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); Map minTimeQueryContext = new HashMap<>(queryContext); minTimeQueryContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0"); Map maxTimeQueryContext = new HashMap<>(queryContext); @@ -3638,7 +3646,7 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest + "AND __time IN (SELECT MAX(__time) FROM foo)\n" + "AND __time NOT IN (SELECT MIN(__time) FROM foo)\n" + "GROUP BY 1", - queryContext, + updatedQueryContext, ImmutableList.of( GroupByQuery.builder() .setDataSource( @@ -3732,6 +3740,8 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest { cannotVectorize(); + Map updatedQueryContext = new HashMap<>(queryContext); + updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); Map minTimeQueryContext = new HashMap<>(queryContext); minTimeQueryContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0"); Map maxTimeQueryContext = new HashMap<>(queryContext); @@ -3743,7 +3753,7 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest + "LEFT JOIN (SELECT MIN(__time) t FROM foo) t1 on t1.t = foo.__time\n" + "WHERE dim1 IN ('abc', 'def') AND t1.t is null\n" + "GROUP BY 1", - queryContext, + updatedQueryContext, ImmutableList.of( GroupByQuery.builder() .setDataSource( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java index 74e42c53654..6ed157d349d 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; import org.apache.druid.query.aggregation.LongMinAggregatorFactory; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; @@ -38,15 +39,18 @@ public class CalciteTimeBoundaryQueryTest extends BaseCalciteQueryTest @Test public void testMaxTimeQuery() throws Exception { - HashMap context = new HashMap<>(QUERY_CONTEXT_DEFAULT); - context.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, "a0"); + HashMap queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); + queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); + HashMap expectedContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); + expectedContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, "a0"); testQuery( "SELECT MAX(__time) AS maxTime FROM foo", + queryContext, ImmutableList.of( Druids.newTimeBoundaryQueryBuilder() .dataSource("foo") .bound(TimeBoundaryQuery.MAX_TIME) - .context(context) + .context(expectedContext) .build() ), ImmutableList.of(new Object[]{DateTimes.of("2001-01-03").getMillis()}) @@ -56,15 +60,18 @@ public class CalciteTimeBoundaryQueryTest extends BaseCalciteQueryTest @Test public void testMinTimeQuery() throws Exception { - HashMap context = new HashMap<>(QUERY_CONTEXT_DEFAULT); - context.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0"); + HashMap queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); + queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); + HashMap expectedContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); + expectedContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0"); testQuery( "SELECT MIN(__time) AS minTime FROM foo", + queryContext, ImmutableList.of( Druids.newTimeBoundaryQueryBuilder() .dataSource("foo") .bound(TimeBoundaryQuery.MIN_TIME) - .context(context) + .context(expectedContext) .build() ), ImmutableList.of(new Object[]{DateTimes.of("2000-01-01").getMillis()}) @@ -74,10 +81,13 @@ public class CalciteTimeBoundaryQueryTest extends BaseCalciteQueryTest @Test public void testMinTimeQueryWithFilters() throws Exception { - HashMap context = new HashMap<>(QUERY_CONTEXT_DEFAULT); - context.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0"); + HashMap queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); + queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); + HashMap expectedContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); + expectedContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0"); testQuery( "SELECT MIN(__time) AS minTime FROM foo where __time >= '2001-01-01' and __time < '2003-01-01'", + queryContext, ImmutableList.of( Druids.newTimeBoundaryQueryBuilder() .dataSource("foo") @@ -87,7 +97,7 @@ public class CalciteTimeBoundaryQueryTest extends BaseCalciteQueryTest ) ) .bound(TimeBoundaryQuery.MIN_TIME) - .context(context) + .context(expectedContext) .build() ), ImmutableList.of(new Object[]{DateTimes.of("2001-01-01").getMillis()}) @@ -99,8 +109,11 @@ public class CalciteTimeBoundaryQueryTest extends BaseCalciteQueryTest @Test public void testMinMaxTimeQuery() throws Exception { + HashMap context = new HashMap<>(QUERY_CONTEXT_DEFAULT); + context.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true); testQuery( "SELECT MIN(__time) AS minTime, MAX(__time) as maxTime FROM foo", + context, ImmutableList.of( Druids.newTimeseriesQueryBuilder() .dataSource("foo") @@ -109,7 +122,7 @@ public class CalciteTimeBoundaryQueryTest extends BaseCalciteQueryTest new LongMinAggregatorFactory("a0", "__time"), new LongMaxAggregatorFactory("a1", "__time") ) - .context(QUERY_CONTEXT_DEFAULT) + .context(context) .build() ), ImmutableList.of(new Object[]{ diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java index 5169a4d53d5..31b9c316318 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java @@ -65,6 +65,7 @@ public class TestInsertQueryMaker implements QueryMaker // INSERT queries should stick to groupBy, scan. case CAN_RUN_TIMESERIES: case CAN_RUN_TOPN: + case CAN_RUN_TIME_BOUNDARY: return false; // INSERT uses external data. diff --git a/website/.spelling b/website/.spelling index d23986e9a06..7097c7bd269 100644 --- a/website/.spelling +++ b/website/.spelling @@ -560,6 +560,9 @@ useApproximateCountDistinct useGroupingSetForExactDistinct useApproximateTopN wikipedia +enableTimeBoundaryPlanning +TimeBoundary +druid.query.default.context.enableTimeBoundaryPlanning IEC - ../docs/comparisons/druid-vs-elasticsearch.md 100x