Merge pull request #1277 from druid-io/document-query-processors

Add some javadoc to the two Query processing interfaces
This commit is contained in:
Fangjin Yang 2015-04-09 18:19:09 -07:00
commit d0fb5c8458
3 changed files with 150 additions and 8 deletions

View File

@ -22,10 +22,45 @@ import io.druid.segment.Segment;
import java.util.concurrent.ExecutorService;
/**
* An interface that defines the nitty gritty implementation detauls of a Query on a Segment
*/
public interface QueryRunnerFactory<T, QueryType extends Query<T>>
{
/**
* Given a specific segment, this method will create a QueryRunner.
*
* The QueryRunner, when asked, will generate a Sequence of results based on the given segment. This
* is the meat of the query processing and is where the results are actually generated. Everything else
* is just merging and reduction logic.
*
* @param segment The segment to process
* @return A QueryRunner that, when asked, will generate a Sequence of results based on the given segment
*/
public QueryRunner<T> createRunner(Segment segment);
/**
* Runners generated with createRunner() and combined into an Iterable in (time,shardId) order are passed
* along to this method with an ExecutorService. The method should then return a QueryRunner that, when
* asked, will use the ExecutorService to run the base QueryRunners in some fashion.
*
* The vast majority of the time, this should be implemented with
*
* return new ChainedExecutionQueryRunner<>(
* queryExecutor, toolChest.getOrdering(), queryWatcher, queryRunners
* );
*
* Which will allow for parallel execution up to the maximum number of processing threads allowed.
*
* @param queryExecutor ExecutorService to be used for parallel processing
* @param queryRunners Individual QueryRunner objects that produce some results
* @return a QueryRunner that, when asked, will use the ExecutorService to runt he base QueryRunners
*/
public QueryRunner<T> mergeRunners(ExecutorService queryExecutor, Iterable<QueryRunner<T>> queryRunners);
/**
* Provides access to the toolchest for this specific query type.
*
* @return an instance of the toolchest for this specific query type.
*/
public QueryToolChest<T, QueryType> getToolchest();
}

View File

@ -33,53 +33,161 @@ import java.util.List;
*/
public abstract class QueryToolChest<ResultType, QueryType extends Query<ResultType>>
{
/**
* This method wraps a QueryRunner. The input QueryRunner, by contract, will provide a series of
* ResultType objects in time order (ascending). This method should return a new QueryRunner that
* potentially merges the stream of ordered ResultType objects.
*
* @param runner A QueryRunner that provides a series of ResultType objects in time order (ascending)
* @return a QueryRunner that potentialy merges the stream of ordered ResultType objects
*/
public abstract QueryRunner<ResultType> mergeResults(QueryRunner<ResultType> runner);
/**
* This method doesn't belong here, but it's here for now just to make it work.
* This method doesn't belong here, but it's here for now just to make it work. The method needs to
* take a Sequence of Sequences and return a single Sequence of ResultType objects in time-order (ascending)
*
* This method assumes that its input sequences provide values already in sorted order.
* Even more specifically, it assumes that the individual sequences are also ordered by their first element.
*
* In the vast majority of cases, this should just be implemented with:
*
* return new OrderedMergeSequence<>(getOrdering(), seqOfSequences);
*
* @param seqOfSequences sequence of sequences to be merged
*
* @return the sequence of merged results
*/
public abstract Sequence<ResultType> mergeSequences(Sequence<Sequence<ResultType>> seqOfSequences);
/**
* This method doesn't belong here, but it's here for now just to make it work. The method needs to
* take a Sequence of Sequences and return a single Sequence of ResultType objects in time-order (ascending)
*
* This method assumes that its input sequences provide values already in sorted order, but, unlike
* mergeSequences, it does *not* assume that the individual sequences are also ordered by their first element.
*
* In the vast majority if ocases, this hsould just be implemented with:
*
* return new MergeSequence<>(getOrdering(), seqOfSequences);
*
* @param seqOfSequences sequence of sequences to be merged
* @return the sequence of merged results
*/
public abstract Sequence<ResultType> mergeSequencesUnordered(Sequence<Sequence<ResultType>> seqOfSequences);
/**
* Creates a builder that is used to generate a metric for this specific query type. This exists
* to allow for query-specific dimensions on metrics. That is, the ToolChest is expected to set some
* meaningful dimensions for metrics given this query type. Examples might be the topN threshhold for
* a TopN query or the number of dimensions included for a groupBy query.
*
* @param query The query that is being processed
* @return A MetricEvent.Builder that can be used to make metrics for the provided query
*/
public abstract ServiceMetricEvent.Builder makeMetricBuilder(QueryType query);
/**
* Creates a Function that can take in a ResultType and return a new ResultType having applied
* the MetricManipulatorFn to each of the metrics.
*
* This exists because the QueryToolChest is the only thing that understands the internal serialization
* format of ResultType, so it's primary responsibility is to "decompose" that structure and apply the
* given function to all metrics.
*
* This function is called very early in the processing pipeline on the Broker.
*
* @param query The Query that is currently being processed
* @param fn The function that should be applied to all metrics in the results
* @return A function that will apply the provided fn to all metrics in the input ResultType object
*/
public abstract Function<ResultType, ResultType> makePreComputeManipulatorFn(
QueryType query,
MetricManipulationFn fn
);
/**
* Generally speaking this is the exact same thing as makePreComputeManipulatorFn. It is leveraged in
* order to compute PostAggregators on results after they have been completely merged together, which
* should actually be done in the mergeResults() call instead of here.
*
* This should never actually be overridden and it should be removed as quickly as possible.
*
* @param query The Query that is currently being processed
* @param fn The function that should be applied to all metrics in the results
* @return A function that will apply the provided fn to all metrics in the input ResultType object
*/
public Function<ResultType, ResultType> makePostComputeManipulatorFn(QueryType query, MetricManipulationFn fn)
{
return makePreComputeManipulatorFn(query, fn);
}
/**
* Returns a TypeReference object that is just passed through to Jackson in order to deserialize
* the results of this type of query.
*
* @return A TypeReference to indicate to Jackson what type of data will exist for this query
*/
public abstract TypeReference<ResultType> getResultTypeReference();
/**
* Returns a CacheStrategy to be used to load data into the cache and remove it from the cache.
*
* This is optional. If it returns null, caching is effectively disabled for the query.
*
* @param query The query whose results might be cached
* @param <T> The type of object that will be stored in the cache
* @return A CacheStrategy that can be used to populate and read from the Cache
*/
public <T> CacheStrategy<ResultType, T, QueryType> getCacheStrategy(QueryType query)
{
return null;
}
/**
* Wraps a QueryRunner. The input QueryRunner is the QueryRunner as it exists *before* being passed to
* mergeResults().
*
* In fact, the return value of this method is always passed to mergeResults, so it is equivalent to
* just implement this functionality as extra decoration on the QueryRunner during mergeResults().
*
* In the interests of potentially simplifying these interfaces, the recommendation is to actually not
* override this method and instead apply anything that might be needed here in the mergeResults() call.
*
* @param runner The runner to be wrapped
* @return The wrapped runner
*/
public QueryRunner<ResultType> preMergeQueryDecoration(QueryRunner<ResultType> runner)
{
return runner;
}
/**
* Wraps a QueryRunner. The input QueryRunner is the QueryRunner as it exists coming out of mergeResults()
*
* In fact, the input value of this method is always the return value from mergeResults, so it is equivalent
* to just implement this functionality as extra decoration on the QueryRunner during mergeResults().
*
* In the interests of potentially simplifying these interfaces, the recommendation is to actually not
* override this method and instead apply anything that might be needed here in the mergeResults() call.
*
* @param runner The runner to be wrapped
* @return The wrapped runner
*/
public QueryRunner<ResultType> postMergeQueryDecoration(QueryRunner<ResultType> runner)
{
return runner;
}
/**
* @param query
* @param segments list of segments sorted by segment intervals.
* @return list of segments to be queried in order to determine query results.
* This method is called to allow the query to prune segments that it does not believe need to actually
* be queried. It can use whatever criteria it wants in order to do the pruning, it just needs to
* return the list of Segments it actually wants to see queried.
*
* @param query The query being processed
* @param segments The list of candidate segments to be queried
* @param <T> A Generic parameter because Java is cool
* @return The list of segments to actually query
*/
public <T extends LogicalSegment> List<T> filterSegments(QueryType query, List<T> segments)
{

View File

@ -193,7 +193,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
+ prunedAggs.size()
+ 1
);
// JVM couldn't optimize this too well, so this is helping it out a bit.
for (int i = 0; i < aggregatorFactories.length; ++i) {
final String aggName = aggFactoryNames[i];
values.put(aggName, fn.manipulate(aggregatorFactories[i], input.getMetric(aggName)));
@ -253,8 +253,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
+ query.getPostAggregatorSpecs().size()
+ 1
);
// put non finalized aggregators for calculating dependent post Aggregators
// JVM is dumb about optimization
for( int i = 0; i < aggFactoryNames.length; ++i){
final String name = aggFactoryNames[i];
values.put(name, input.getMetric(name));