From 7517f0d0f01507decefe1283c12640ab578b62ba Mon Sep 17 00:00:00 2001 From: Eric Tschetter Date: Thu, 9 Apr 2015 18:11:42 -0700 Subject: [PATCH] Add some javadoc to the two Query processing interfaces to help aid in implementations of new Queries. Also, remove some comments that did not have enough context to actually make sense to anyone but the original author (at least, I hope they make sense to the author, I definitely don't know what was being said). --- .../io/druid/query/QueryRunnerFactory.java | 35 ++++++ .../java/io/druid/query/QueryToolChest.java | 118 +++++++++++++++++- .../query/topn/TopNQueryQueryToolChest.java | 5 +- 3 files changed, 150 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/io/druid/query/QueryRunnerFactory.java b/processing/src/main/java/io/druid/query/QueryRunnerFactory.java index 3ee0f22e545..79eae84718d 100644 --- a/processing/src/main/java/io/druid/query/QueryRunnerFactory.java +++ b/processing/src/main/java/io/druid/query/QueryRunnerFactory.java @@ -22,10 +22,45 @@ import io.druid.segment.Segment; import java.util.concurrent.ExecutorService; /** + * An interface that defines the nitty gritty implementation detauls of a Query on a Segment */ public interface QueryRunnerFactory> { + /** + * Given a specific segment, this method will create a QueryRunner. + * + * The QueryRunner, when asked, will generate a Sequence of results based on the given segment. This + * is the meat of the query processing and is where the results are actually generated. Everything else + * is just merging and reduction logic. + * + * @param segment The segment to process + * @return A QueryRunner that, when asked, will generate a Sequence of results based on the given segment + */ public QueryRunner createRunner(Segment segment); + + /** + * Runners generated with createRunner() and combined into an Iterable in (time,shardId) order are passed + * along to this method with an ExecutorService. The method should then return a QueryRunner that, when + * asked, will use the ExecutorService to run the base QueryRunners in some fashion. + * + * The vast majority of the time, this should be implemented with + * + * return new ChainedExecutionQueryRunner<>( + * queryExecutor, toolChest.getOrdering(), queryWatcher, queryRunners + * ); + * + * Which will allow for parallel execution up to the maximum number of processing threads allowed. + * + * @param queryExecutor ExecutorService to be used for parallel processing + * @param queryRunners Individual QueryRunner objects that produce some results + * @return a QueryRunner that, when asked, will use the ExecutorService to runt he base QueryRunners + */ public QueryRunner mergeRunners(ExecutorService queryExecutor, Iterable> queryRunners); + + /** + * Provides access to the toolchest for this specific query type. + * + * @return an instance of the toolchest for this specific query type. + */ public QueryToolChest getToolchest(); } diff --git a/processing/src/main/java/io/druid/query/QueryToolChest.java b/processing/src/main/java/io/druid/query/QueryToolChest.java index 4b0e576482a..99969536d10 100644 --- a/processing/src/main/java/io/druid/query/QueryToolChest.java +++ b/processing/src/main/java/io/druid/query/QueryToolChest.java @@ -33,53 +33,161 @@ import java.util.List; */ public abstract class QueryToolChest> { + /** + * This method wraps a QueryRunner. The input QueryRunner, by contract, will provide a series of + * ResultType objects in time order (ascending). This method should return a new QueryRunner that + * potentially merges the stream of ordered ResultType objects. + * + * @param runner A QueryRunner that provides a series of ResultType objects in time order (ascending) + * @return a QueryRunner that potentialy merges the stream of ordered ResultType objects + */ public abstract QueryRunner mergeResults(QueryRunner runner); /** - * This method doesn't belong here, but it's here for now just to make it work. + * This method doesn't belong here, but it's here for now just to make it work. The method needs to + * take a Sequence of Sequences and return a single Sequence of ResultType objects in time-order (ascending) + * + * This method assumes that its input sequences provide values already in sorted order. + * Even more specifically, it assumes that the individual sequences are also ordered by their first element. + * + * In the vast majority of cases, this should just be implemented with: + * + * return new OrderedMergeSequence<>(getOrdering(), seqOfSequences); * * @param seqOfSequences sequence of sequences to be merged - * * @return the sequence of merged results */ public abstract Sequence mergeSequences(Sequence> seqOfSequences); + /** + * This method doesn't belong here, but it's here for now just to make it work. The method needs to + * take a Sequence of Sequences and return a single Sequence of ResultType objects in time-order (ascending) + * + * This method assumes that its input sequences provide values already in sorted order, but, unlike + * mergeSequences, it does *not* assume that the individual sequences are also ordered by their first element. + * + * In the vast majority if ocases, this hsould just be implemented with: + * + * return new MergeSequence<>(getOrdering(), seqOfSequences); + * + * @param seqOfSequences sequence of sequences to be merged + * @return the sequence of merged results + */ public abstract Sequence mergeSequencesUnordered(Sequence> seqOfSequences); + /** + * Creates a builder that is used to generate a metric for this specific query type. This exists + * to allow for query-specific dimensions on metrics. That is, the ToolChest is expected to set some + * meaningful dimensions for metrics given this query type. Examples might be the topN threshhold for + * a TopN query or the number of dimensions included for a groupBy query. + * + * @param query The query that is being processed + * @return A MetricEvent.Builder that can be used to make metrics for the provided query + */ public abstract ServiceMetricEvent.Builder makeMetricBuilder(QueryType query); + /** + * Creates a Function that can take in a ResultType and return a new ResultType having applied + * the MetricManipulatorFn to each of the metrics. + * + * This exists because the QueryToolChest is the only thing that understands the internal serialization + * format of ResultType, so it's primary responsibility is to "decompose" that structure and apply the + * given function to all metrics. + * + * This function is called very early in the processing pipeline on the Broker. + * + * @param query The Query that is currently being processed + * @param fn The function that should be applied to all metrics in the results + * @return A function that will apply the provided fn to all metrics in the input ResultType object + */ public abstract Function makePreComputeManipulatorFn( QueryType query, MetricManipulationFn fn ); + /** + * Generally speaking this is the exact same thing as makePreComputeManipulatorFn. It is leveraged in + * order to compute PostAggregators on results after they have been completely merged together, which + * should actually be done in the mergeResults() call instead of here. + * + * This should never actually be overridden and it should be removed as quickly as possible. + * + * @param query The Query that is currently being processed + * @param fn The function that should be applied to all metrics in the results + * @return A function that will apply the provided fn to all metrics in the input ResultType object + */ public Function makePostComputeManipulatorFn(QueryType query, MetricManipulationFn fn) { return makePreComputeManipulatorFn(query, fn); } + /** + * Returns a TypeReference object that is just passed through to Jackson in order to deserialize + * the results of this type of query. + * + * @return A TypeReference to indicate to Jackson what type of data will exist for this query + */ public abstract TypeReference getResultTypeReference(); + /** + * Returns a CacheStrategy to be used to load data into the cache and remove it from the cache. + * + * This is optional. If it returns null, caching is effectively disabled for the query. + * + * @param query The query whose results might be cached + * @param The type of object that will be stored in the cache + * @return A CacheStrategy that can be used to populate and read from the Cache + */ public CacheStrategy getCacheStrategy(QueryType query) { return null; } + /** + * Wraps a QueryRunner. The input QueryRunner is the QueryRunner as it exists *before* being passed to + * mergeResults(). + * + * In fact, the return value of this method is always passed to mergeResults, so it is equivalent to + * just implement this functionality as extra decoration on the QueryRunner during mergeResults(). + * + * In the interests of potentially simplifying these interfaces, the recommendation is to actually not + * override this method and instead apply anything that might be needed here in the mergeResults() call. + * + * @param runner The runner to be wrapped + * @return The wrapped runner + */ public QueryRunner preMergeQueryDecoration(QueryRunner runner) { return runner; } + /** + * Wraps a QueryRunner. The input QueryRunner is the QueryRunner as it exists coming out of mergeResults() + * + * In fact, the input value of this method is always the return value from mergeResults, so it is equivalent + * to just implement this functionality as extra decoration on the QueryRunner during mergeResults(). + * + * In the interests of potentially simplifying these interfaces, the recommendation is to actually not + * override this method and instead apply anything that might be needed here in the mergeResults() call. + * + * @param runner The runner to be wrapped + * @return The wrapped runner + */ public QueryRunner postMergeQueryDecoration(QueryRunner runner) { return runner; } /** - * @param query - * @param segments list of segments sorted by segment intervals. - * @return list of segments to be queried in order to determine query results. + * This method is called to allow the query to prune segments that it does not believe need to actually + * be queried. It can use whatever criteria it wants in order to do the pruning, it just needs to + * return the list of Segments it actually wants to see queried. + * + * @param query The query being processed + * @param segments The list of candidate segments to be queried + * @param A Generic parameter because Java is cool + * @return The list of segments to actually query */ public List filterSegments(QueryType query, List segments) { diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java index d8f1acd67dc..a6569b52e39 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java @@ -193,7 +193,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest