diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java index 6ec9fde8fcb..6b0f4a11c94 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java @@ -94,20 +94,45 @@ public abstract class AggregatorFactory implements Cacheable } /** - * Returns an AggregatorFactory that can be used to combine the output of aggregators from this factory. This - * generally amounts to simply creating a new factory that is the same as the current except with its input - * column renamed to the same as the output column. + * Returns an AggregatorFactory that can be used to combine the output of aggregators from this factory. It is used + * when we know we have some values that were produced with this aggregator factory, and want to do some additional + * combining of them. This happens, for example, when merging query results from two different segments, or two + * different servers. + * + * For simple aggregators, the combining factory may be computed by simply creating a new factory that is the same as + * the current, except with its input column renamed to the same as the output column. For example, this aggregator: + * + * {"type": "longSum", "fieldName": "foo", "name": "bar"} + * + * Would become: + * + * {"type": "longSum", "fieldName": "bar", "name": "bar"} + * + * Sometimes, the type or other parameters of the combining aggregator will be different from the original aggregator. + * For example, the {@link CountAggregatorFactory} getCombiningFactory method will return a + * {@link LongSumAggregatorFactory}, because counts are combined by summing. + * + * No matter what, `foo.getCombiningFactory()` and `foo.getCombiningFactory().getCombiningFactory()` should return + * the same result. * * @return a new Factory that can be used for operations on top of data output from the current factory. */ public abstract AggregatorFactory getCombiningFactory(); /** - * Returns an AggregatorFactory that can be used to merge the output of aggregators from this factory and - * other factory. - * This method is relevant only for AggregatorFactory which can be used at ingestion time. + * Returns an AggregatorFactory that can be used to combine the output of aggregators from this factory and + * another factory. It is used when we have some values produced by this aggregator factory, and some values produced + * by the "other" aggregator factory, and we want to do some additional combining of them. This happens, for example, + * when compacting two segments together that both have a metric column with the same name. (Even though the name of + * the column is the same, the aggregator factory used to create it may be different from segment to segment.) + * + * This method may throw {@link AggregatorFactoryNotMergeableException}, meaning that "this" and "other" are not + * compatible and values from one cannot sensibly be combined with values from the other. * * @return a new Factory that can be used for merging the output of aggregators from this factory and other. + * + * @see #getCombiningFactory() which is equivalent to {@code foo.getMergingFactory(foo)} (when "this" and "other" + * are the same instance). */ public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException { @@ -120,9 +145,15 @@ public abstract class AggregatorFactory implements Cacheable } /** - * Gets a list of all columns that this AggregatorFactory will scan + * Used by {@link org.apache.druid.query.groupby.strategy.GroupByStrategyV1} when running nested groupBys, to + * "transfer" values from this aggreagtor to an incremental index that the outer query will run on. This method + * only exists due to the design of GroupByStrategyV1, and should probably not be used for anything else. If you are + * here because you are looking for a way to get the input fields required by this aggregator, and thought + * "getRequiredColumns" sounded right, please use {@link #requiredFields()} instead. * - * @return AggregatorFactories for the columns to scan of the parent AggregatorFactory + * @return AggregatorFactories that can be used to "transfer" values from this aggregator into an incremental index + * + * @see #requiredFields() a similarly-named method that is perhaps the one you want instead. */ public abstract List getRequiredColumns(); @@ -149,6 +180,9 @@ public abstract class AggregatorFactory implements Cacheable public abstract String getName(); + /** + * Get a list of fields that aggregators built by this factory will need to read. + */ public abstract List requiredFields(); public abstract String getTypeName();