minor adjustments for performance (#16714)

changes:
* switch to stop using some string.format
* switch some streams to classic loops
This commit is contained in:
Clint Wylie 2024-07-11 16:57:15 -07:00 committed by GitHub
parent 307b8849de
commit dca31d466c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 73 additions and 64 deletions

View File

@ -211,24 +211,25 @@ public class GroupByQueryEngine
final List<DimensionSpec> dimensions final List<DimensionSpec> dimensions
) )
{ {
return dimensions for (DimensionSpec dimension : dimensions) {
.stream()
.allMatch(
dimension -> {
if (dimension.mustDecorate()) { if (dimension.mustDecorate()) {
// DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors.
// To be safe, we must return false here. // To be safe, we must return false here.
return false; return false;
} }
// if dimension spec type is array, skip it since we can handle array or multi-valued
if (dimension.getOutputType().isArray()) {
continue;
}
// Now check column capabilities, which must be present and explicitly not multi-valued and not arrays // Now check column capabilities, which must be present and explicitly not multi-valued and not arrays
final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); final ColumnCapabilities capabilities = inspector.getColumnCapabilities(dimension.getDimension());
return dimension.getOutputType().isArray() if (capabilities == null || capabilities.hasMultipleValues().isMaybeTrue() || capabilities.isArray()) {
|| (columnCapabilities != null return false;
&& columnCapabilities.hasMultipleValues().isFalse() }
&& !columnCapabilities.isArray() }
); return true;
});
} }
private abstract static class GroupByEngineIterator<KeyType> implements Iterator<ResultRow>, Closeable private abstract static class GroupByEngineIterator<KeyType> implements Iterator<ResultRow>, Closeable

View File

@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorAdapters;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.Filter;
@ -204,9 +205,7 @@ public class VectorGroupByEngine
return adapter.canVectorize(filter, query.getVirtualColumns(), false) return adapter.canVectorize(filter, query.getVirtualColumns(), false)
&& canVectorizeDimensions(inspector, query.getDimensions()) && canVectorizeDimensions(inspector, query.getDimensions())
&& VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter)
&& query.getAggregatorSpecs() && canVectorizeAggregators(inspector, query.getAggregatorSpecs());
.stream()
.allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector));
} }
private static boolean canVectorizeDimensions( private static boolean canVectorizeDimensions(
@ -214,10 +213,7 @@ public class VectorGroupByEngine
final List<DimensionSpec> dimensions final List<DimensionSpec> dimensions
) )
{ {
return dimensions for (DimensionSpec dimension : dimensions) {
.stream()
.allMatch(
dimension -> {
if (!dimension.canVectorize()) { if (!dimension.canVectorize()) {
return false; return false;
} }
@ -236,13 +232,26 @@ public class VectorGroupByEngine
// Now check column capabilities. // Now check column capabilities.
final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension());
if (columnCapabilities != null && columnCapabilities.hasMultipleValues().isMaybeTrue()) {
// null here currently means the column does not exist, nil columns can be vectorized // null here currently means the column does not exist, nil columns can be vectorized
if (columnCapabilities == null) { // multi-value columns implicit unnest is not currently supported in the vector processing engine
return false;
}
}
return true; return true;
} }
// must be single valued
return columnCapabilities.hasMultipleValues().isFalse(); public static boolean canVectorizeAggregators(
}); final ColumnInspector inspector,
final List<AggregatorFactory> aggregatorFactories
)
{
for (AggregatorFactory aggregatorFactory : aggregatorFactories) {
if (!aggregatorFactory.canVectorize(inspector)) {
return false;
}
}
return true;
} }
@VisibleForTesting @VisibleForTesting

View File

@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorAdapters;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine;
import org.apache.druid.query.vector.VectorCursorGranularizer; import org.apache.druid.query.vector.VectorCursorGranularizer;
import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.SegmentMissingException; import org.apache.druid.segment.SegmentMissingException;
@ -103,7 +104,7 @@ public class TimeseriesQueryEngine
final boolean doVectorize = query.context().getVectorize().shouldVectorize( final boolean doVectorize = query.context().getVectorize().shouldVectorize(
adapter.canVectorize(filter, query.getVirtualColumns(), descending) adapter.canVectorize(filter, query.getVirtualColumns(), descending)
&& VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter)
&& query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector)) && VectorGroupByEngine.canVectorizeAggregators(inspector, query.getAggregatorSpecs())
); );
final Sequence<Result<TimeseriesResultValue>> result; final Sequence<Result<TimeseriesResultValue>> result;

View File

@ -266,7 +266,12 @@ public class VirtualColumns implements Cacheable
public boolean canVectorize(ColumnInspector columnInspector) public boolean canVectorize(ColumnInspector columnInspector)
{ {
final ColumnInspector inspector = wrapInspector(columnInspector); final ColumnInspector inspector = wrapInspector(columnInspector);
return virtualColumns.stream().allMatch(virtualColumn -> virtualColumn.canVectorize(inspector)); for (VirtualColumn virtualColumn : virtualColumns) {
if (!virtualColumn.canVectorize(inspector)) {
return false;
}
}
return true;
} }
/** /**

View File

@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import com.google.common.primitives.Doubles; import com.google.common.primitives.Doubles;
import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.RE;
@ -915,13 +916,11 @@ public abstract class CompressedNestedDataComplexColumn<TStringDictionary extend
); );
// we should check this someday soon, but for now just read it to push the buffer position ahead // we should check this someday soon, but for now just read it to push the buffer position ahead
int flags = dataBuffer.getInt(); int flags = dataBuffer.getInt();
Preconditions.checkState( if (flags != DictionaryEncodedColumnPartSerde.NO_FLAGS) {
flags == DictionaryEncodedColumnPartSerde.NO_FLAGS, throw DruidException.defensive(
StringUtils.format( "Unrecognized bits set in space reserved for future flags for field column [%s]", field
"Unrecognized bits set in space reserved for future flags for field column [%s]",
field
)
); );
}
final Supplier<FixedIndexed<Integer>> localDictionarySupplier = FixedIndexed.read( final Supplier<FixedIndexed<Integer>> localDictionarySupplier = FixedIndexed.read(
dataBuffer, dataBuffer,

View File

@ -123,6 +123,6 @@ public abstract class NestedCommonFormatColumnSerializer implements GenericColum
*/ */
public static String getInternalFileName(String fileNameBase, String field) public static String getInternalFileName(String fileNameBase, String field)
{ {
return StringUtils.format("%s.%s", fileNameBase, field); return fileNameBase + "." + field;
} }
} }

View File

@ -27,7 +27,6 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter;
@ -182,7 +181,7 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<Str
doubleDictionaryWriter.open(); doubleDictionaryWriter.open();
rawWriter = new CompressedVariableSizedBlobColumnSerializer( rawWriter = new CompressedVariableSizedBlobColumnSerializer(
getInternalFileName(name, RAW_FILE_NAME), NestedCommonFormatColumnSerializer.getInternalFileName(name, RAW_FILE_NAME),
segmentWriteOutMedium, segmentWriteOutMedium,
indexSpec.getJsonCompression() != null ? indexSpec.getJsonCompression() : CompressionStrategy.LZ4 indexSpec.getJsonCompression() != null ? indexSpec.getJsonCompression() : CompressionStrategy.LZ4
); );
@ -390,14 +389,9 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<Str
private void writeInternal(FileSmoosher smoosher, Serializer serializer, String fileName) throws IOException private void writeInternal(FileSmoosher smoosher, Serializer serializer, String fileName) throws IOException
{ {
final String internalName = getInternalFileName(name, fileName); final String internalName = NestedCommonFormatColumnSerializer.getInternalFileName(name, fileName);
try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(internalName, serializer.getSerializedSize())) { try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(internalName, serializer.getSerializedSize())) {
serializer.writeTo(smooshChannel, smoosher); serializer.writeTo(smooshChannel, smoosher);
} }
} }
public static String getInternalFileName(String fileNameBase, String field)
{
return StringUtils.format("%s.%s", fileNameBase, field);
}
} }