diff --git a/processing/src/main/java/io/druid/query/QueryRunnerFactory.java b/processing/src/main/java/io/druid/query/QueryRunnerFactory.java index 3ee0f22e545..79eae84718d 100644 --- a/processing/src/main/java/io/druid/query/QueryRunnerFactory.java +++ b/processing/src/main/java/io/druid/query/QueryRunnerFactory.java @@ -22,10 +22,45 @@ import io.druid.segment.Segment; import java.util.concurrent.ExecutorService; /** + * An interface that defines the nitty gritty implementation detauls of a Query on a Segment */ public interface QueryRunnerFactory> { + /** + * Given a specific segment, this method will create a QueryRunner. + * + * The QueryRunner, when asked, will generate a Sequence of results based on the given segment. This + * is the meat of the query processing and is where the results are actually generated. Everything else + * is just merging and reduction logic. + * + * @param segment The segment to process + * @return A QueryRunner that, when asked, will generate a Sequence of results based on the given segment + */ public QueryRunner createRunner(Segment segment); + + /** + * Runners generated with createRunner() and combined into an Iterable in (time,shardId) order are passed + * along to this method with an ExecutorService. The method should then return a QueryRunner that, when + * asked, will use the ExecutorService to run the base QueryRunners in some fashion. + * + * The vast majority of the time, this should be implemented with + * + * return new ChainedExecutionQueryRunner<>( + * queryExecutor, toolChest.getOrdering(), queryWatcher, queryRunners + * ); + * + * Which will allow for parallel execution up to the maximum number of processing threads allowed. + * + * @param queryExecutor ExecutorService to be used for parallel processing + * @param queryRunners Individual QueryRunner objects that produce some results + * @return a QueryRunner that, when asked, will use the ExecutorService to runt he base QueryRunners + */ public QueryRunner mergeRunners(ExecutorService queryExecutor, Iterable> queryRunners); + + /** + * Provides access to the toolchest for this specific query type. + * + * @return an instance of the toolchest for this specific query type. + */ public QueryToolChest getToolchest(); } diff --git a/processing/src/main/java/io/druid/query/QueryToolChest.java b/processing/src/main/java/io/druid/query/QueryToolChest.java index 4b0e576482a..99969536d10 100644 --- a/processing/src/main/java/io/druid/query/QueryToolChest.java +++ b/processing/src/main/java/io/druid/query/QueryToolChest.java @@ -33,53 +33,161 @@ import java.util.List; */ public abstract class QueryToolChest> { + /** + * This method wraps a QueryRunner. The input QueryRunner, by contract, will provide a series of + * ResultType objects in time order (ascending). This method should return a new QueryRunner that + * potentially merges the stream of ordered ResultType objects. + * + * @param runner A QueryRunner that provides a series of ResultType objects in time order (ascending) + * @return a QueryRunner that potentialy merges the stream of ordered ResultType objects + */ public abstract QueryRunner mergeResults(QueryRunner runner); /** - * This method doesn't belong here, but it's here for now just to make it work. + * This method doesn't belong here, but it's here for now just to make it work. The method needs to + * take a Sequence of Sequences and return a single Sequence of ResultType objects in time-order (ascending) + * + * This method assumes that its input sequences provide values already in sorted order. + * Even more specifically, it assumes that the individual sequences are also ordered by their first element. + * + * In the vast majority of cases, this should just be implemented with: + * + * return new OrderedMergeSequence<>(getOrdering(), seqOfSequences); * * @param seqOfSequences sequence of sequences to be merged - * * @return the sequence of merged results */ public abstract Sequence mergeSequences(Sequence> seqOfSequences); + /** + * This method doesn't belong here, but it's here for now just to make it work. The method needs to + * take a Sequence of Sequences and return a single Sequence of ResultType objects in time-order (ascending) + * + * This method assumes that its input sequences provide values already in sorted order, but, unlike + * mergeSequences, it does *not* assume that the individual sequences are also ordered by their first element. + * + * In the vast majority if ocases, this hsould just be implemented with: + * + * return new MergeSequence<>(getOrdering(), seqOfSequences); + * + * @param seqOfSequences sequence of sequences to be merged + * @return the sequence of merged results + */ public abstract Sequence mergeSequencesUnordered(Sequence> seqOfSequences); + /** + * Creates a builder that is used to generate a metric for this specific query type. This exists + * to allow for query-specific dimensions on metrics. That is, the ToolChest is expected to set some + * meaningful dimensions for metrics given this query type. Examples might be the topN threshhold for + * a TopN query or the number of dimensions included for a groupBy query. + * + * @param query The query that is being processed + * @return A MetricEvent.Builder that can be used to make metrics for the provided query + */ public abstract ServiceMetricEvent.Builder makeMetricBuilder(QueryType query); + /** + * Creates a Function that can take in a ResultType and return a new ResultType having applied + * the MetricManipulatorFn to each of the metrics. + * + * This exists because the QueryToolChest is the only thing that understands the internal serialization + * format of ResultType, so it's primary responsibility is to "decompose" that structure and apply the + * given function to all metrics. + * + * This function is called very early in the processing pipeline on the Broker. + * + * @param query The Query that is currently being processed + * @param fn The function that should be applied to all metrics in the results + * @return A function that will apply the provided fn to all metrics in the input ResultType object + */ public abstract Function makePreComputeManipulatorFn( QueryType query, MetricManipulationFn fn ); + /** + * Generally speaking this is the exact same thing as makePreComputeManipulatorFn. It is leveraged in + * order to compute PostAggregators on results after they have been completely merged together, which + * should actually be done in the mergeResults() call instead of here. + * + * This should never actually be overridden and it should be removed as quickly as possible. + * + * @param query The Query that is currently being processed + * @param fn The function that should be applied to all metrics in the results + * @return A function that will apply the provided fn to all metrics in the input ResultType object + */ public Function makePostComputeManipulatorFn(QueryType query, MetricManipulationFn fn) { return makePreComputeManipulatorFn(query, fn); } + /** + * Returns a TypeReference object that is just passed through to Jackson in order to deserialize + * the results of this type of query. + * + * @return A TypeReference to indicate to Jackson what type of data will exist for this query + */ public abstract TypeReference getResultTypeReference(); + /** + * Returns a CacheStrategy to be used to load data into the cache and remove it from the cache. + * + * This is optional. If it returns null, caching is effectively disabled for the query. + * + * @param query The query whose results might be cached + * @param The type of object that will be stored in the cache + * @return A CacheStrategy that can be used to populate and read from the Cache + */ public CacheStrategy getCacheStrategy(QueryType query) { return null; } + /** + * Wraps a QueryRunner. The input QueryRunner is the QueryRunner as it exists *before* being passed to + * mergeResults(). + * + * In fact, the return value of this method is always passed to mergeResults, so it is equivalent to + * just implement this functionality as extra decoration on the QueryRunner during mergeResults(). + * + * In the interests of potentially simplifying these interfaces, the recommendation is to actually not + * override this method and instead apply anything that might be needed here in the mergeResults() call. + * + * @param runner The runner to be wrapped + * @return The wrapped runner + */ public QueryRunner preMergeQueryDecoration(QueryRunner runner) { return runner; } + /** + * Wraps a QueryRunner. The input QueryRunner is the QueryRunner as it exists coming out of mergeResults() + * + * In fact, the input value of this method is always the return value from mergeResults, so it is equivalent + * to just implement this functionality as extra decoration on the QueryRunner during mergeResults(). + * + * In the interests of potentially simplifying these interfaces, the recommendation is to actually not + * override this method and instead apply anything that might be needed here in the mergeResults() call. + * + * @param runner The runner to be wrapped + * @return The wrapped runner + */ public QueryRunner postMergeQueryDecoration(QueryRunner runner) { return runner; } /** - * @param query - * @param segments list of segments sorted by segment intervals. - * @return list of segments to be queried in order to determine query results. + * This method is called to allow the query to prune segments that it does not believe need to actually + * be queried. It can use whatever criteria it wants in order to do the pruning, it just needs to + * return the list of Segments it actually wants to see queried. + * + * @param query The query being processed + * @param segments The list of candidate segments to be queried + * @param A Generic parameter because Java is cool + * @return The list of segments to actually query */ public List filterSegments(QueryType query, List segments) { diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java index d8f1acd67dc..a6569b52e39 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java @@ -193,7 +193,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest