minor adjustments for performance (#16714)

changes:
* switch to stop using some string.format
* switch some streams to classic loops
This commit is contained in:
Clint Wylie 2024-07-11 16:57:15 -07:00 committed by GitHub
parent 307b8849de
commit dca31d466c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 73 additions and 64 deletions

View File

@ -211,24 +211,25 @@ public class GroupByQueryEngine
final List<DimensionSpec> dimensions
)
{
return dimensions
.stream()
.allMatch(
dimension -> {
if (dimension.mustDecorate()) {
// DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors.
// To be safe, we must return false here.
return false;
}
for (DimensionSpec dimension : dimensions) {
if (dimension.mustDecorate()) {
// DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors.
// To be safe, we must return false here.
return false;
}
// Now check column capabilities, which must be present and explicitly not multi-valued and not arrays
final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension());
return dimension.getOutputType().isArray()
|| (columnCapabilities != null
&& columnCapabilities.hasMultipleValues().isFalse()
&& !columnCapabilities.isArray()
);
});
// if dimension spec type is array, skip it since we can handle array or multi-valued
if (dimension.getOutputType().isArray()) {
continue;
}
// Now check column capabilities, which must be present and explicitly not multi-valued and not arrays
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(dimension.getDimension());
if (capabilities == null || capabilities.hasMultipleValues().isMaybeTrue() || capabilities.isArray()) {
return false;
}
}
return true;
}
private abstract static class GroupByEngineIterator<KeyType> implements Iterator<ResultRow>, Closeable

View File

@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.aggregation.AggregatorAdapters;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.Filter;
@ -204,9 +205,7 @@ public class VectorGroupByEngine
return adapter.canVectorize(filter, query.getVirtualColumns(), false)
&& canVectorizeDimensions(inspector, query.getDimensions())
&& VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter)
&& query.getAggregatorSpecs()
.stream()
.allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector));
&& canVectorizeAggregators(inspector, query.getAggregatorSpecs());
}
private static boolean canVectorizeDimensions(
@ -214,35 +213,45 @@ public class VectorGroupByEngine
final List<DimensionSpec> dimensions
)
{
return dimensions
.stream()
.allMatch(
dimension -> {
if (!dimension.canVectorize()) {
return false;
}
for (DimensionSpec dimension : dimensions) {
if (!dimension.canVectorize()) {
return false;
}
if (dimension.mustDecorate()) {
// group by on multi value dimensions are not currently supported
// DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors.
// To be safe, we must return false here.
return false;
}
if (dimension.mustDecorate()) {
// group by on multi value dimensions are not currently supported
// DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors.
// To be safe, we must return false here.
return false;
}
if (!dimension.getOutputType().isPrimitive()) {
// group by on arrays and complex types is not currently supported in the vector processing engine
return false;
}
if (!dimension.getOutputType().isPrimitive()) {
// group by on arrays and complex types is not currently supported in the vector processing engine
return false;
}
// Now check column capabilities.
final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension());
// null here currently means the column does not exist, nil columns can be vectorized
if (columnCapabilities == null) {
return true;
}
// must be single valued
return columnCapabilities.hasMultipleValues().isFalse();
});
// Now check column capabilities.
final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension());
if (columnCapabilities != null && columnCapabilities.hasMultipleValues().isMaybeTrue()) {
// null here currently means the column does not exist, nil columns can be vectorized
// multi-value columns implicit unnest is not currently supported in the vector processing engine
return false;
}
}
return true;
}
public static boolean canVectorizeAggregators(
final ColumnInspector inspector,
final List<AggregatorFactory> aggregatorFactories
)
{
for (AggregatorFactory aggregatorFactory : aggregatorFactories) {
if (!aggregatorFactory.canVectorize(inspector)) {
return false;
}
}
return true;
}
@VisibleForTesting

View File

@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorAdapters;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine;
import org.apache.druid.query.vector.VectorCursorGranularizer;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.SegmentMissingException;
@ -103,7 +104,7 @@ public class TimeseriesQueryEngine
final boolean doVectorize = query.context().getVectorize().shouldVectorize(
adapter.canVectorize(filter, query.getVirtualColumns(), descending)
&& VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter)
&& query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector))
&& VectorGroupByEngine.canVectorizeAggregators(inspector, query.getAggregatorSpecs())
);
final Sequence<Result<TimeseriesResultValue>> result;

View File

@ -266,7 +266,12 @@ public class VirtualColumns implements Cacheable
public boolean canVectorize(ColumnInspector columnInspector)
{
final ColumnInspector inspector = wrapInspector(columnInspector);
return virtualColumns.stream().allMatch(virtualColumn -> virtualColumn.canVectorize(inspector));
for (VirtualColumn virtualColumn : virtualColumns) {
if (!virtualColumn.canVectorize(inspector)) {
return false;
}
}
return true;
}
/**

View File

@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import com.google.common.primitives.Doubles;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.RE;
@ -915,13 +916,11 @@ public abstract class CompressedNestedDataComplexColumn<TStringDictionary extend
);
// we should check this someday soon, but for now just read it to push the buffer position ahead
int flags = dataBuffer.getInt();
Preconditions.checkState(
flags == DictionaryEncodedColumnPartSerde.NO_FLAGS,
StringUtils.format(
"Unrecognized bits set in space reserved for future flags for field column [%s]",
field
)
);
if (flags != DictionaryEncodedColumnPartSerde.NO_FLAGS) {
throw DruidException.defensive(
"Unrecognized bits set in space reserved for future flags for field column [%s]", field
);
}
final Supplier<FixedIndexed<Integer>> localDictionarySupplier = FixedIndexed.read(
dataBuffer,

View File

@ -123,6 +123,6 @@ public abstract class NestedCommonFormatColumnSerializer implements GenericColum
*/
public static String getInternalFileName(String fileNameBase, String field)
{
return StringUtils.format("%s.%s", fileNameBase, field);
return fileNameBase + "." + field;
}
}

View File

@ -27,7 +27,6 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter;
@ -182,7 +181,7 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<Str
doubleDictionaryWriter.open();
rawWriter = new CompressedVariableSizedBlobColumnSerializer(
getInternalFileName(name, RAW_FILE_NAME),
NestedCommonFormatColumnSerializer.getInternalFileName(name, RAW_FILE_NAME),
segmentWriteOutMedium,
indexSpec.getJsonCompression() != null ? indexSpec.getJsonCompression() : CompressionStrategy.LZ4
);
@ -390,14 +389,9 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<Str
private void writeInternal(FileSmoosher smoosher, Serializer serializer, String fileName) throws IOException
{
final String internalName = getInternalFileName(name, fileName);
final String internalName = NestedCommonFormatColumnSerializer.getInternalFileName(name, fileName);
try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(internalName, serializer.getSerializedSize())) {
serializer.writeTo(smooshChannel, smoosher);
}
}
public static String getInternalFileName(String fileNameBase, String field)
{
return StringUtils.format("%s.%s", fileNameBase, field);
}
}