Add Finalization option to RowSignature.addAggregators. (#11882)

* Add Finalization option to RowSignature.addAggregators.

This make type signatures more useful when the caller knows whether it will
be reading aggregation results in their finalized or intermediate types.

* Fix call site.
This commit is contained in:
Gian Merlino 2021-11-10 06:05:29 -08:00 committed by GitHub
parent d3914c1a78
commit db4d157be6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 125 additions and 28 deletions

View File

@ -222,7 +222,7 @@ public class GroupByQuery extends BaseQuery<ResultRow>
verifyOutputNames(this.dimensions, this.aggregatorSpecs, this.postAggregatorSpecs); verifyOutputNames(this.dimensions, this.aggregatorSpecs, this.postAggregatorSpecs);
this.universalTimestamp = computeUniversalTimestamp(); this.universalTimestamp = computeUniversalTimestamp();
this.resultRowSignature = computeResultRowSignature(); this.resultRowSignature = computeResultRowSignature(RowSignature.Finalization.UNKNOWN);
this.havingSpec = havingSpec; this.havingSpec = havingSpec;
this.limitSpec = LimitSpec.nullToNoopLimitSpec(limitSpec); this.limitSpec = LimitSpec.nullToNoopLimitSpec(limitSpec);
this.subtotalsSpec = verifySubtotalsSpec(subtotalsSpec, this.dimensions); this.subtotalsSpec = verifySubtotalsSpec(subtotalsSpec, this.dimensions);
@ -320,8 +320,7 @@ public class GroupByQuery extends BaseQuery<ResultRow>
} }
/** /**
* Returns a list of field names, of the same size as {@link #getResultRowSizeWithPostAggregators()}, in the * Equivalent to {@code getResultRowSignature(Finalization.UNKNOWN)}.
* order that they will appear in ResultRows for this query.
* *
* @see ResultRow for documentation about the order that fields will be in * @see ResultRow for documentation about the order that fields will be in
*/ */
@ -330,6 +329,25 @@ public class GroupByQuery extends BaseQuery<ResultRow>
return resultRowSignature; return resultRowSignature;
} }
/**
* Returns a result row signature, of the same size as {@link #getResultRowSizeWithPostAggregators()}, in the
* order that they will appear in ResultRows for this query.
*
* Aggregator types in the signature depend on the value of {@code finalization}.
*
* If finalization is {@link RowSignature.Finalization#UNKNOWN}, this method returns a cached object.
*
* @see ResultRow for documentation about the order that fields will be in
*/
public RowSignature getResultRowSignature(final RowSignature.Finalization finalization)
{
if (finalization == RowSignature.Finalization.UNKNOWN) {
return resultRowSignature;
} else {
return computeResultRowSignature(finalization);
}
}
/** /**
* Returns the size of ResultRows for this query when they do not include post-aggregators. * Returns the size of ResultRows for this query when they do not include post-aggregators.
*/ */
@ -481,7 +499,7 @@ public class GroupByQuery extends BaseQuery<ResultRow>
return forcePushDown; return forcePushDown;
} }
private RowSignature computeResultRowSignature() private RowSignature computeResultRowSignature(final RowSignature.Finalization finalization)
{ {
final RowSignature.Builder builder = RowSignature.builder(); final RowSignature.Builder builder = RowSignature.builder();
@ -490,7 +508,7 @@ public class GroupByQuery extends BaseQuery<ResultRow>
} }
return builder.addDimensions(dimensions) return builder.addDimensions(dimensions)
.addAggregators(aggregatorSpecs) .addAggregators(aggregatorSpecs, finalization)
.addPostAggregators(postAggregatorSpecs) .addPostAggregators(postAggregatorSpecs)
.build(); .build();
} }

View File

@ -68,6 +68,7 @@ import org.apache.druid.segment.RowAdapter;
import org.apache.druid.segment.RowBasedColumnSelectorFactory; import org.apache.druid.segment.RowBasedColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.filter.BooleanValueMatcher; import org.apache.druid.segment.filter.BooleanValueMatcher;
@ -203,7 +204,8 @@ public class RowBasedGrouperHelper
ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory( ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(
combining ? query : subquery, combining ? query : subquery,
columnSelectorRow::get columnSelectorRow::get,
RowSignature.Finalization.UNKNOWN
); );
// Apply virtual columns if we are in subquery (non-combining) mode. // Apply virtual columns if we are in subquery (non-combining) mode.
@ -343,12 +345,16 @@ public class RowBasedGrouperHelper
* *
* @param query a groupBy query * @param query a groupBy query
* @param supplier supplier of result rows from the query * @param supplier supplier of result rows from the query
* @param finalization whether the column capabilities reported by this factory should reflect finalized types
*/ */
public static ColumnSelectorFactory createResultRowBasedColumnSelectorFactory( public static ColumnSelectorFactory createResultRowBasedColumnSelectorFactory(
final GroupByQuery query, final GroupByQuery query,
final Supplier<ResultRow> supplier final Supplier<ResultRow> supplier,
final RowSignature.Finalization finalization
) )
{ {
final RowSignature signature = query.getResultRowSignature(finalization);
final RowAdapter<ResultRow> adapter = final RowAdapter<ResultRow> adapter =
new RowAdapter<ResultRow>() new RowAdapter<ResultRow>()
{ {
@ -366,7 +372,7 @@ public class RowBasedGrouperHelper
@Override @Override
public Function<ResultRow, Object> columnFunction(final String columnName) public Function<ResultRow, Object> columnFunction(final String columnName)
{ {
final int columnIndex = query.getResultRowSignature().indexOf(columnName); final int columnIndex = signature.indexOf(columnName);
if (columnIndex < 0) { if (columnIndex < 0) {
return row -> null; return row -> null;
} else { } else {
@ -378,7 +384,7 @@ public class RowBasedGrouperHelper
return RowBasedColumnSelectorFactory.create( return RowBasedColumnSelectorFactory.create(
adapter, adapter,
supplier::get, supplier::get,
() -> query.getResultRowSignature(), () -> signature,
false false
); );
} }
@ -400,7 +406,14 @@ public class RowBasedGrouperHelper
final SettableSupplier<ResultRow> rowSupplier = new SettableSupplier<>(); final SettableSupplier<ResultRow> rowSupplier = new SettableSupplier<>();
final ColumnSelectorFactory columnSelectorFactory = final ColumnSelectorFactory columnSelectorFactory =
query.getVirtualColumns().wrap(RowBasedGrouperHelper.createResultRowBasedColumnSelectorFactory(subquery, rowSupplier)); query.getVirtualColumns()
.wrap(
RowBasedGrouperHelper.createResultRowBasedColumnSelectorFactory(
subquery,
rowSupplier,
RowSignature.Finalization.UNKNOWN
)
);
final ValueMatcher filterMatcher = filter == null final ValueMatcher filterMatcher = filter == null
? BooleanValueMatcher.of(true) ? BooleanValueMatcher.of(true)
@ -965,7 +978,12 @@ public class RowBasedGrouperHelper
} }
} }
private static int compareDimsInRows(RowBasedKey key1, RowBasedKey key2, final List<ColumnType> fieldTypes, int dimStart) private static int compareDimsInRows(
RowBasedKey key1,
RowBasedKey key2,
final List<ColumnType> fieldTypes,
int dimStart
)
{ {
for (int i = dimStart; i < key1.getKey().length; i++) { for (int i = dimStart; i < key1.getKey().length; i++) {
final int cmp; final int cmp;
@ -1337,7 +1355,12 @@ public class RowBasedGrouperHelper
case LONG: case LONG:
case FLOAT: case FLOAT:
case DOUBLE: case DOUBLE:
return makeNullHandlingNumericserdeHelper(valueType.getType(), keyBufferPosition, pushLimitDown, stringComparator); return makeNullHandlingNumericserdeHelper(
valueType.getType(),
keyBufferPosition,
pushLimitDown,
stringComparator
);
default: default:
throw new IAE("invalid type: %s", valueType); throw new IAE("invalid type: %s", valueType);
} }

View File

@ -32,6 +32,7 @@ import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper; import org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper;
import org.apache.druid.segment.column.RowSignature;
import java.util.Objects; import java.util.Objects;
import java.util.function.Function; import java.util.function.Function;
@ -83,7 +84,8 @@ public class DimFilterHavingSpec implements HavingSpec
this.matcher = dimFilter.toFilter().makeMatcher( this.matcher = dimFilter.toFilter().makeMatcher(
RowBasedGrouperHelper.createResultRowBasedColumnSelectorFactory( RowBasedGrouperHelper.createResultRowBasedColumnSelectorFactory(
query, query,
rowSupplier rowSupplier,
finalize ? RowSignature.Finalization.YES : RowSignature.Finalization.NO
) )
); );
} }

View File

@ -223,6 +223,8 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest<Result<Timeser
List<AggregatorFactory> aggregatorSpecs = query.getAggregatorSpecs(); List<AggregatorFactory> aggregatorSpecs = query.getAggregatorSpecs();
Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()]; Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()];
String[] aggregatorNames = new String[aggregatorSpecs.size()]; String[] aggregatorNames = new String[aggregatorSpecs.size()];
RowSignature aggregatorsSignature =
RowSignature.builder().addAggregators(aggregatorSpecs, RowSignature.Finalization.UNKNOWN).build();
for (int i = 0; i < aggregatorSpecs.size(); i++) { for (int i = 0; i < aggregatorSpecs.size(); i++) {
aggregators[i] = aggregators[i] =
aggregatorSpecs.get(i) aggregatorSpecs.get(i)
@ -230,7 +232,7 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest<Result<Timeser
RowBasedColumnSelectorFactory.create( RowBasedColumnSelectorFactory.create(
RowAdapters.standardRow(), RowAdapters.standardRow(),
() -> new MapBasedRow(null, null), () -> new MapBasedRow(null, null),
() -> RowSignature.builder().addAggregators(aggregatorSpecs).build(), () -> aggregatorsSignature,
false false
) )
); );
@ -417,7 +419,7 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest<Result<Timeser
if (StringUtils.isNotEmpty(query.getTimestampResultField())) { if (StringUtils.isNotEmpty(query.getTimestampResultField())) {
rowSignatureBuilder.add(query.getTimestampResultField(), ColumnType.LONG); rowSignatureBuilder.add(query.getTimestampResultField(), ColumnType.LONG);
} }
rowSignatureBuilder.addAggregators(query.getAggregatorSpecs()); rowSignatureBuilder.addAggregators(query.getAggregatorSpecs(), RowSignature.Finalization.UNKNOWN);
rowSignatureBuilder.addPostAggregators(query.getPostAggregatorSpecs()); rowSignatureBuilder.addPostAggregators(query.getPostAggregatorSpecs());
return rowSignatureBuilder.build(); return rowSignatureBuilder.build();
} }

View File

@ -511,7 +511,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
return RowSignature.builder() return RowSignature.builder()
.addTimeColumn() .addTimeColumn()
.addDimensions(Collections.singletonList(query.getDimensionSpec())) .addDimensions(Collections.singletonList(query.getDimensionSpec()))
.addAggregators(query.getAggregatorSpecs()) .addAggregators(query.getAggregatorSpecs(), RowSignature.Finalization.UNKNOWN)
.addPostAggregators(query.getPostAggregatorSpecs()) .addPostAggregators(query.getPostAggregatorSpecs())
.build(); .build();
} }

View File

@ -188,6 +188,7 @@ public class RowSignature implements ColumnInspector
} }
final String columnName = columnNames.get(i); final String columnName = columnNames.get(i);
s.append(columnName).append(":").append(columnTypes.get(columnName)); s.append(columnName).append(":").append(columnTypes.get(columnName));
} }
return s.append("}").toString(); return s.append("}").toString();
} }
@ -249,24 +250,57 @@ public class RowSignature implements ColumnInspector
return this; return this;
} }
public Builder addAggregators(final List<AggregatorFactory> aggregators) /**
* Adds aggregations to a signature.
*
* {@link Finalization#YES} will add finalized types and {@link Finalization#NO} will add intermediate types.
* {@link Finalization#UNKNOWN} will add the intermediate / finalized type when they are the same. Otherwise, it
* will add a null type.
*
* @param aggregators list of aggregation functions
* @param finalization whether the aggregator results will be finalized
*/
public Builder addAggregators(final List<AggregatorFactory> aggregators, final Finalization finalization)
{ {
for (final AggregatorFactory aggregator : aggregators) { for (final AggregatorFactory aggregator : aggregators) {
final ColumnType type = aggregator.getType(); final ColumnType type;
if (type.equals(aggregator.getFinalizedType())) { switch (finalization) {
add(aggregator.getName(), type); case YES:
type = aggregator.getFinalizedType();
break;
case NO:
type = aggregator.getType();
break;
default:
assert finalization == Finalization.UNKNOWN;
if (aggregator.getType() == aggregator.getFinalizedType()) {
type = aggregator.getType();
} else { } else {
// Use null if the type depends on whether or not the aggregator is finalized, since // Use null if the type depends on whether the aggregator is finalized, since we don't know if
// we don't know if it will be finalized or not. So null (i.e. unknown) is the proper // it will be finalized or not.
// thing to do (currently). type = null;
add(aggregator.getName(), null);
} }
break;
}
add(aggregator.getName(), type);
} }
return this; return this;
} }
/**
* Adds post-aggregators to a signature.
*
* Note: to ensure types are computed properly, post-aggregators must be added *after* any columns that they
* depend on, and they must be added in the order that the query engine will compute them. This method assumes
* that post-aggregators are computed in order, and that they can refer to earlier post-aggregators but not
* to later ones.
*/
public Builder addPostAggregators(final List<PostAggregator> postAggregators) public Builder addPostAggregators(final List<PostAggregator> postAggregators)
{ {
for (final PostAggregator postAggregator : postAggregators) { for (final PostAggregator postAggregator : postAggregators) {
@ -289,4 +323,22 @@ public class RowSignature implements ColumnInspector
return new RowSignature(columnTypeList); return new RowSignature(columnTypeList);
} }
} }
public enum Finalization
{
/**
* Aggregation results will be finalized.
*/
YES,
/**
* Aggregation results will not be finalized.
*/
NO,
/**
* Aggregation results may or may not be finalized.
*/
UNKNOWN
}
} }