abstract common base of SQL micro-benchmarks to reduce boilerplate and standardize parameters (#17383)

changes:
* adds `SqlBenchmarkDatasets` which contains commonly used benchmark data generator schemas
* adds `SqlBaseBenchmark` which contains common benchmark segment generation methods for any benchmark using `SqlBenchmarkDatasets`
* adds `SqlBaseQueryBenchmark` and `SqlBasePlanBenchmark` for benchmarks measuring queries and planning respectively
* migrate all existing SQL jmh benchmarks to extend `SqlBaseQueryBenchmark`, quite dramatically reducing the boilerplate needed to create benchmarks, and allowing the use of multiple datasources within a benchmark file
* adjustments to data generator stuff to allow passing in an ObjectMapper so that the same mapper can be used for both benchmark queries and segment generation, avoiding the need to register stuff with both mappers for benchmarks
* adds `SqlProjectionsBenchmark` and `SqlComplexMetricsColumnsBenchmark` for measuring projections and measuring complex metric compression respectively
This commit is contained in:
Clint Wylie 2024-10-22 19:37:17 -07:00 committed by GitHub
parent 9dfb378711
commit 1157ecdec3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 1770 additions and 1757 deletions

View File

@ -20,7 +20,7 @@
package org.apache.druid.benchmark.lookup;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.benchmark.query.SqlBenchmark;
import org.apache.druid.benchmark.query.SqlBaseBenchmark;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
@ -54,6 +54,7 @@ import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.concurrent.TimeUnit;
/**
@ -111,10 +112,11 @@ public class SqlReverseLookupBenchmark
final QueryableIndex index =
segmentGenerator.generate(dataSegment, schemaInfo, IndexSpec.DEFAULT, Granularities.NONE, 1);
final Pair<PlannerFactory, SqlEngine> sqlSystem = SqlBenchmark.createSqlSystem(
final Pair<PlannerFactory, SqlEngine> sqlSystem = SqlBaseBenchmark.createSqlSystem(
ImmutableMap.of(dataSegment, index),
Collections.emptyMap(),
ImmutableMap.of("benchmark-lookup", lookup),
null,
SqlBaseBenchmark.BenchmarkStorage.MMAP,
closer
);

View File

@ -0,0 +1,559 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.multibindings.MapBinder;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.frame.FrameType;
import org.apache.druid.frame.read.FrameReader;
import org.apache.druid.frame.segment.FrameSegment;
import org.apache.druid.frame.testutil.FrameTestUtil;
import org.apache.druid.guice.ExpressionModule;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.guice.SegmentWranglerModule;
import org.apache.druid.guice.StartupInjectorBuilder;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctUtf8SqlAggregator;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchEstimateOperatorConversion;
import org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchModule;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchApproxQuantileSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchObjectSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchQuantileOperatorConversion;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchQuantilesOperatorConversion;
import org.apache.druid.query.aggregation.datasketches.theta.SketchModule;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchEstimateOperatorConversion;
import org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketchModule;
import org.apache.druid.query.lookup.LookupExtractor;
import org.apache.druid.segment.IncrementalIndexSegment;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.PhysicalSegmentInspector;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.QueryableIndexPhysicalSegmentInspector;
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.data.CompressionStrategy;
import org.apache.druid.segment.data.FrontCodedIndexed;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator;
import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule;
import org.apache.druid.sql.calcite.aggregation.SqlAggregator;
import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.QueryLookupOperatorConversion;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidOperatorTable;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.LookylooModule;
import org.apache.druid.sql.calcite.util.QueryFrameworkUtils;
import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule;
import org.apache.druid.sql.hook.DruidHookDispatcher;
import org.apache.druid.timeline.DataSegment;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@State(Scope.Benchmark)
public class SqlBaseBenchmark
{
private static final Logger log = new Logger(SqlBaseBenchmark.class);
private static final String STORAGE_MMAP = "MMAP";
private static final String STORAGE_INCREMENTAL = "INCREMENTAL";
private static final String STORAGE_FRAME_ROW = "FRAME_ROW";
private static final String STORAGE_FRAME_COLUMNAR = "FRAME_COLUMNAR";
static {
NullHandling.initializeForTests();
ExpressionProcessing.initializeForTests();
HllSketchModule.registerSerde();
SketchModule.registerSerde();
DoublesSketchModule.registerSerde();
CalciteTests.getJsonMapper()
.registerModules(new HllSketchModule().getJacksonModules())
.registerModules(new SketchModule().getJacksonModules())
.registerModules(new DoublesSketchModule().getJacksonModules())
.registerModules(new ArrayOfDoublesSketchModule().getJacksonModules());
}
public enum BenchmarkStorage
{
MMAP,
INCREMENTAL,
FRAME_COLUMNAR,
FRAME_ROW
}
public enum BenchmarkStringEncodingStrategy
{
UTF8,
FRONT_CODED_DEFAULT_V1,
FRONT_CODED_16_V1
}
@Param({"1500000"})
protected int rowsPerSegment;
@Param({
"false",
"force"
})
protected String vectorize;
@Param({
"UTF8",
"FRONT_CODED_DEFAULT_V1",
"FRONT_CODED_16_V1"
})
protected BenchmarkStringEncodingStrategy stringEncoding;
@Param({
"none",
"lz4"
})
protected String complexCompression;
@Param({
"explicit",
"auto"
})
protected String schemaType;
// Can be STORAGE_MMAP, STORAGE_INCREMENTAL, STORAGE_FRAME_ROW, or STORAGE_FRAME_COLUMNAR
@Param({
STORAGE_MMAP,
STORAGE_INCREMENTAL,
STORAGE_FRAME_ROW,
STORAGE_FRAME_COLUMNAR
})
protected BenchmarkStorage storageType;
protected SqlEngine engine;
@Nullable
protected PlannerFactory plannerFactory;
private final Closer closer = Closer.create();
protected QueryContexts.Vectorize vectorizeContext;
public String getQuery()
{
throw new UnsupportedOperationException("getQuery not implemented");
}
public List<String> getDatasources()
{
throw new UnsupportedOperationException("getDatasources not implemented");
}
protected Map<String, Object> getContext()
{
final Map<String, Object> context = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize
);
return context;
}
protected IndexSpec getIndexSpec()
{
return IndexSpec.builder()
.withStringDictionaryEncoding(getStringEncodingStrategy())
.withComplexMetricCompression(
CompressionStrategy.valueOf(StringUtils.toUpperCase(complexCompression))
)
.build();
}
@Setup(Level.Trial)
public void setup() throws JsonProcessingException
{
vectorizeContext = QueryContexts.Vectorize.fromString(vectorize);
checkIncompatibleParameters();
Map<DataSegment, IncrementalIndex> realtimeSegments = new HashMap<>();
Map<DataSegment, QueryableIndex> segments = new HashMap<>();
for (String dataSource : getDatasources()) {
final SqlBenchmarkDatasets.BenchmarkSchema schema;
if ("auto".equals(schemaType)) {
schema = SqlBenchmarkDatasets.getSchema(dataSource).asAutoDimensions();
} else {
schema = SqlBenchmarkDatasets.getSchema(dataSource);
}
for (DataSegment dataSegment : schema.getDataSegments()) {
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info(
"Starting benchmark setup using cacheDir[%s], rows[%,d].",
segmentGenerator.getCacheDir(),
rowsPerSegment
);
if (BenchmarkStorage.INCREMENTAL == storageType) {
final IncrementalIndex index = segmentGenerator.generateIncrementalIndex(
dataSegment,
schema.getGeneratorSchemaInfo(),
schema.getDimensionsSpec(),
schema.getTransformSpec(),
schema.getAggregators(),
getIndexSpec(),
schema.getQueryGranularity(),
schema.getProjections(),
rowsPerSegment,
CalciteTests.getJsonMapper()
);
log.info(
"Segment metadata: %s",
CalciteTests.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(index.getMetadata())
);
realtimeSegments.put(dataSegment, index);
} else {
final QueryableIndex index = segmentGenerator.generate(
dataSegment,
schema.getGeneratorSchemaInfo(),
schema.getDimensionsSpec(),
schema.getTransformSpec(),
getIndexSpec(),
schema.getQueryGranularity(),
schema.getProjections(),
rowsPerSegment,
CalciteTests.getJsonMapper()
);
log.info(
"Segment metadata: %s",
CalciteTests.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(index.getMetadata())
);
segments.put(dataSegment, index);
}
}
}
final Pair<PlannerFactory, SqlEngine> sqlSystem = createSqlSystem(
segments,
realtimeSegments,
Collections.emptyMap(),
storageType,
closer
);
plannerFactory = sqlSystem.lhs;
engine = sqlSystem.rhs;
final ObjectMapper jsonMapper = CalciteTests.getJsonMapper();
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(
engine,
"EXPLAIN PLAN FOR " + getQuery(),
ImmutableMap.<String, Object>builder()
.putAll(getContext())
.put(
"useNativeQueryExplain",
true
)
.build()
)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] planResult = resultSequence.toList().get(0);
log.info("Native query plan:\n" +
jsonMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
);
}
catch (JsonProcessingException ex) {
log.warn(ex, "explain failed");
}
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, getQuery(), getContext())) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final int rowCount = resultSequence.toList().size();
log.info("Total result row count:" + rowCount);
}
catch (Throwable ex) {
log.warn(ex, "failed to count rows");
}
if (vectorizeContext.shouldVectorize(true)) {
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory,
getQuery()
);
log.info("non-vectorized and vectorized results match");
}
catch (Throwable ex) {
log.warn(ex, "non-vectorized and vectorized results do not match");
}
}
}
private void checkIncompatibleParameters()
{
// if running as fork 0, maybe don't use these combinations since it will kill everything
if (stringEncoding != BenchmarkStringEncodingStrategy.UTF8 && storageType != BenchmarkStorage.MMAP) {
System.exit(0);
}
// complex compression only applies to mmap segments, dont bother otherwise
if (!"none".equals(complexCompression) && storageType != BenchmarkStorage.MMAP) {
System.exit(0);
}
// vectorize only works for mmap and frame column segments, bail out if
if (vectorizeContext.shouldVectorize(true) && !(storageType == BenchmarkStorage.MMAP || storageType == BenchmarkStorage.FRAME_COLUMNAR)) {
System.exit(0);
}
}
private StringEncodingStrategy getStringEncodingStrategy()
{
if (stringEncoding == BenchmarkStringEncodingStrategy.FRONT_CODED_DEFAULT_V1) {
return new StringEncodingStrategy.FrontCoded(null, FrontCodedIndexed.V1);
} else if (stringEncoding == BenchmarkStringEncodingStrategy.FRONT_CODED_16_V1) {
return new StringEncodingStrategy.FrontCoded(16, FrontCodedIndexed.V1);
} else {
return new StringEncodingStrategy.Utf8();
}
}
public static Pair<PlannerFactory, SqlEngine> createSqlSystem(
final Map<DataSegment, QueryableIndex> segmentMap,
final Map<DataSegment, IncrementalIndex> realtimeSegmentsMap,
final Map<String, LookupExtractor> lookupMap,
@Nullable final BenchmarkStorage storageType,
final Closer closer
)
{
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate);
final PlannerConfig plannerConfig = new PlannerConfig();
for (final Map.Entry<DataSegment, QueryableIndex> segmentEntry : segmentMap.entrySet()) {
addSegmentToWalker(walker, segmentEntry.getKey(), segmentEntry.getValue(), storageType);
}
for (final Map.Entry<DataSegment, IncrementalIndex> segmentEntry : realtimeSegmentsMap.entrySet()) {
walker.add(
segmentEntry.getKey(),
new IncrementalIndexSegment(segmentEntry.getValue(), segmentEntry.getKey().getId())
);
}
// Child injector that adds additional lookups.
final Injector injector = new StartupInjectorBuilder()
.withEmptyProperties()
.add(
new ExpressionModule(),
new SegmentWranglerModule(),
new LookylooModule(),
new SqlAggregationModule(),
new CalciteTestOperatorModule(),
binder -> {
for (Map.Entry<String, LookupExtractor> entry : lookupMap.entrySet()) {
MapBinder.newMapBinder(binder, String.class, LookupExtractor.class)
.addBinding(entry.getKey())
.toProvider(entry::getValue)
.in(LazySingleton.class);
}
},
new HllSketchModule(),
new SketchModule(),
new DoublesSketchModule(),
binder -> {
}
)
.build();
ObjectMapper injected = injector.getInstance(Key.get(ObjectMapper.class, Json.class));
injected.registerModules(new HllSketchModule().getJacksonModules());
final DruidSchemaCatalog rootSchema =
QueryFrameworkUtils.createMockRootSchema(
injector,
conglomerate,
walker,
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER
);
final SqlEngine engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
final PlannerFactory plannerFactory = new PlannerFactory(
rootSchema,
createOperatorTable(injector),
injector.getInstance(ExprMacroTable.class),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
injector.getInstance(Key.get(ObjectMapper.class, Json.class)),
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
new JoinableFactoryWrapper(QueryFrameworkUtils.createDefaultJoinableFactory(injector)),
CatalogResolver.NULL_RESOLVER,
new AuthConfig(),
new DruidHookDispatcher()
);
return Pair.of(plannerFactory, engine);
}
private static void addSegmentToWalker(
final SpecificSegmentsQuerySegmentWalker walker,
final DataSegment descriptor,
final QueryableIndex index,
@Nullable final BenchmarkStorage storageType
)
{
if (storageType == null || BenchmarkStorage.MMAP == storageType) {
walker.add(descriptor, new QueryableIndexSegment(index, descriptor.getId()));
} else if (BenchmarkStorage.FRAME_ROW == storageType) {
QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
walker.add(
descriptor,
new FrameSegment(
FrameTestUtil.cursorFactoryToFrame(cursorFactory, FrameType.ROW_BASED),
FrameReader.create(cursorFactory.getRowSignature()),
descriptor.getId()
)
{
@Nullable
@Override
public <T> T as(@Nonnull Class<T> clazz)
{
// computed sql schema uses segment metadata, which relies on physical inspector, use the underlying index
if (clazz.equals(PhysicalSegmentInspector.class)) {
return (T) new QueryableIndexPhysicalSegmentInspector(index);
}
return super.as(clazz);
}
}
);
} else if (BenchmarkStorage.FRAME_COLUMNAR == storageType) {
QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
walker.add(
descriptor,
new FrameSegment(
FrameTestUtil.cursorFactoryToFrame(cursorFactory, FrameType.COLUMNAR),
FrameReader.create(cursorFactory.getRowSignature()),
descriptor.getId()
)
{
@Nullable
@Override
public <T> T as(@Nonnull Class<T> clazz)
{
// computed sql schema uses segment metadata, which relies on physical inspector, use the underlying index
if (clazz.equals(PhysicalSegmentInspector.class)) {
return (T) new QueryableIndexPhysicalSegmentInspector(index);
}
return super.as(clazz);
}
}
);
} else {
throw new IAE("Invalid storageType[%s]", storageType);
}
}
private static DruidOperatorTable createOperatorTable(final Injector injector)
{
try {
final Set<SqlOperatorConversion> operators = new HashSet<>();
operators.add(injector.getInstance(QueryLookupOperatorConversion.class));
operators.addAll(
ImmutableList.of(
new HllSketchEstimateOperatorConversion(),
new ThetaSketchEstimateOperatorConversion(),
new DoublesSketchQuantileOperatorConversion(),
new DoublesSketchQuantilesOperatorConversion()
)
);
final ApproxCountDistinctSqlAggregator countDistinctSqlAggregator =
new ApproxCountDistinctSqlAggregator(new HllSketchApproxCountDistinctSqlAggregator());
final Set<SqlAggregator> aggregators = new HashSet<>(
ImmutableList.of(
new DoublesSketchApproxQuantileSqlAggregator(),
new DoublesSketchObjectSqlAggregator(),
new HllSketchApproxCountDistinctSqlAggregator(),
new HllSketchApproxCountDistinctUtf8SqlAggregator(),
new ThetaSketchApproxCountDistinctSqlAggregator(),
new CountSqlAggregator(countDistinctSqlAggregator),
countDistinctSqlAggregator
)
);
return new DruidOperatorTable(aggregators, operators);
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
@TearDown(Level.Trial)
public void tearDown() throws Exception
{
closer.close();
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.infra.Blackhole;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public class SqlBasePlanBenchmark extends SqlBaseBenchmark
{
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void planSql(Blackhole blackhole)
{
final Map<String, Object> context = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize
);
final String sql = getQuery();
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan();
blackhole.consume(plannerResult);
}
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.infra.Blackhole;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public class SqlBaseQueryBenchmark extends SqlBaseBenchmark
{
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void querySql(Blackhole blackhole)
{
final Map<String, Object> context = getContext();
final String sql = getQuery();
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
}
}

View File

@ -19,101 +19,15 @@
package org.apache.druid.benchmark.query;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.multibindings.MapBinder;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.frame.FrameType;
import org.apache.druid.frame.testutil.FrameTestUtil;
import org.apache.druid.guice.ExpressionModule;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.guice.SegmentWranglerModule;
import org.apache.druid.guice.StartupInjectorBuilder;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Yielder;
import org.apache.druid.java.util.common.guava.Yielders;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctUtf8SqlAggregator;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchApproxQuantileSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchObjectSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.lookup.LookupExtractor;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.data.FrontCodedIndexed;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator;
import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule;
import org.apache.druid.sql.calcite.aggregation.SqlAggregator;
import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.QueryLookupOperatorConversion;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidOperatorTable;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.LookylooModule;
import org.apache.druid.sql.calcite.util.QueryFrameworkUtils;
import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule;
import org.apache.druid.sql.hook.DruidHookDispatcher;
import org.apache.druid.timeline.DataSegment;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
/**
* Benchmark that tests various SQL queries.
@ -122,40 +36,30 @@ import java.util.stream.Collectors;
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlBenchmark
public class SqlBenchmark extends SqlBaseQueryBenchmark
{
static {
NullHandling.initializeForTests();
}
private static final Logger log = new Logger(SqlBenchmark.class);
private static final String STORAGE_MMAP = "mmap";
private static final String STORAGE_FRAME_ROW = "frame-row";
private static final String STORAGE_FRAME_COLUMNAR = "frame-columnar";
private static final List<String> QUERIES = ImmutableList.of(
static final List<String> QUERIES = ImmutableList.of(
// 0, 1, 2, 3: Timeseries, unfiltered
"SELECT COUNT(*) FROM foo",
"SELECT COUNT(DISTINCT hyper) FROM foo",
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo",
"SELECT FLOOR(__time TO MINUTE), SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo GROUP BY 1",
"SELECT COUNT(*) FROM druid.basic",
"SELECT APPROX_COUNT_DISTINCT_BUILTIN(hyper) FROM druid.basic",
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic",
"SELECT FLOOR(__time TO MINUTE), SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic GROUP BY 1",
// 4: Timeseries, low selectivity filter (90% of rows match)
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential NOT LIKE '%3'",
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic WHERE dimSequential NOT LIKE '%3'",
// 5: Timeseries, high selectivity filter (0.1% of rows match)
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential = '311'",
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic WHERE dimSequential = '311'",
// 6: Timeseries, mixing low selectivity index-capable filter (90% of rows match) + cursor filter
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo\n"
"SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic\n"
+ "WHERE dimSequential NOT LIKE '%3' AND maxLongUniform > 10",
// 7: Timeseries, low selectivity toplevel filter (90%), high selectivity filtered aggregator (0.1%)
"SELECT\n"
+ " SUM(sumLongSequential) FILTER(WHERE dimSequential = '311'),\n"
+ " SUM(sumFloatNormal)\n"
+ "FROM foo\n"
+ "FROM druid.basic\n"
+ "WHERE dimSequential NOT LIKE '%3'",
// 8: Timeseries, no toplevel filter, various filtered aggregators with clauses repeated.
@ -175,7 +79,7 @@ public class SqlBenchmark
+ " COUNT(*) FILTER(WHERE dimSequential LIKE '%3'),\n"
+ " COUNT(*) FILTER(WHERE dimSequential NOT LIKE '%3'),\n"
+ " COUNT(*)\n"
+ "FROM foo",
+ "FROM druid.basic",
// 9: Timeseries, toplevel time filter, time-comparison filtered aggregators
"SELECT\n"
@ -183,233 +87,233 @@ public class SqlBenchmark
+ " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-01 12:00:00'),\n"
+ " SUM(sumLongSequential)\n"
+ " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 12:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00')\n"
+ "FROM foo\n"
+ "FROM druid.basic\n"
+ "WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00'",
// 10, 11: GroupBy two strings, unfiltered, unordered
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo GROUP BY 1, 2",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1, 2",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic GROUP BY 1, 2",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential), COUNT(*) FROM druid.basic GROUP BY 1, 2",
// 12, 13, 14: GroupBy one string, unfiltered, various aggregator configurations
"SELECT dimZipf FROM foo GROUP BY 1",
"SELECT dimZipf, COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC",
"SELECT dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC",
"SELECT dimZipf FROM druid.basic GROUP BY 1",
"SELECT dimZipf, COUNT(*) FROM druid.basic GROUP BY 1 ORDER BY COUNT(*) DESC",
"SELECT dimZipf, SUM(sumLongSequential), COUNT(*) FROM druid.basic GROUP BY 1 ORDER BY COUNT(*) DESC",
// 15, 16: GroupBy long, unfiltered, unordered; with and without aggregators
"SELECT maxLongUniform FROM foo GROUP BY 1",
"SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1",
"SELECT maxLongUniform FROM druid.basic GROUP BY 1",
"SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM druid.basic GROUP BY 1",
// 17, 18: GroupBy long, filter by long, unordered; with and without aggregators
"SELECT maxLongUniform FROM foo WHERE maxLongUniform > 10 GROUP BY 1",
"SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo WHERE maxLongUniform > 10 GROUP BY 1",
"SELECT maxLongUniform FROM druid.basic WHERE maxLongUniform > 10 GROUP BY 1",
"SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM druid.basic WHERE maxLongUniform > 10 GROUP BY 1",
// 19: ultra mega union matrix
"WITH matrix (dimZipf, dimSequential) AS (\n"
+ " (\n"
+ " SELECT '100', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '100'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '110', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '110'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '120', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '120'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '130', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '130'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '140', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '140'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '150', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '150'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '160', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '160'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '170', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '170'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '180', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '180'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '190', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '190'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '200', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '200'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '210', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '210'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '220', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '220'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '230', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '230'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '240', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '240'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '250', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '250'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '260', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '260'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '270', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '270'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '280', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '280'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '290', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '290'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '300', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '300'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '310', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '310'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '320', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '320'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '330', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '330'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '340', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '340'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '350', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '350'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '360', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '360'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '370', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '370'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '380', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '380'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT 'other', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n"
+ " WHERE\n"
+ " dimZipf NOT IN (\n"
+ " '100', '110', '120', '130', '140', '150', '160', '170', '180', '190',\n"
@ -423,65 +327,45 @@ public class SqlBenchmark
// 20: GroupBy, doubles sketches
"SELECT dimZipf, APPROX_QUANTILE_DS(sumFloatNormal, 0.5), DS_QUANTILES_SKETCH(maxLongUniform) "
+ "FROM foo "
+ "FROM druid.basic "
+ "GROUP BY 1",
// 21, 22: stringy stuff
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimUniform NOT LIKE '%3' GROUP BY 1, 2",
"SELECT dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential = '311' GROUP BY 1 ORDER BY 1",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimUniform NOT LIKE '%3' GROUP BY 1, 2",
"SELECT dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimSequential = '311' GROUP BY 1 ORDER BY 1",
// 23: full scan
"SELECT * FROM foo",
"SELECT * FROM foo WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100')",
"SELECT * FROM foo WHERE dimSequential > '10' AND dimSequential < '8500'",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100') GROUP BY 1, 2",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential > '10' AND dimSequential < '8500' GROUP BY 1, 2",
"SELECT * FROM druid.basic",
"SELECT * FROM druid.basic WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100')",
"SELECT * FROM druid.basic WHERE dimSequential > '10' AND dimSequential < '8500'",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100') GROUP BY 1, 2",
"SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimSequential > '10' AND dimSequential < '8500' GROUP BY 1, 2",
// 28, 29, 30, 31: Approximate count distinct of strings
"SELECT APPROX_COUNT_DISTINCT_BUILTIN(dimZipf) FROM foo",
"SELECT APPROX_COUNT_DISTINCT_DS_HLL(dimZipf) FROM foo",
"SELECT APPROX_COUNT_DISTINCT_DS_HLL_UTF8(dimZipf) FROM foo",
"SELECT APPROX_COUNT_DISTINCT_DS_THETA(dimZipf) FROM foo",
"SELECT APPROX_COUNT_DISTINCT_BUILTIN(dimZipf) FROM druid.basic",
"SELECT APPROX_COUNT_DISTINCT_DS_HLL(dimZipf) FROM druid.basic",
"SELECT APPROX_COUNT_DISTINCT_DS_HLL_UTF8(dimZipf) FROM druid.basic",
"SELECT APPROX_COUNT_DISTINCT_DS_THETA(dimZipf) FROM druid.basic",
// 32: LATEST aggregator long
"SELECT LATEST(long1) FROM foo",
"SELECT LATEST(long1) FROM druid.expressions",
// 33: LATEST aggregator double
"SELECT LATEST(double4) FROM foo",
"SELECT LATEST(double4) FROM druid.expressions",
// 34: LATEST aggregator double
"SELECT LATEST(float3) FROM foo",
"SELECT LATEST(float3) FROM druid.expressions",
// 35: LATEST aggregator double
"SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM foo",
"SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM druid.expressions",
// 36,37: filter numeric nulls
"SELECT SUM(long5) FROM foo WHERE long5 IS NOT NULL",
"SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1",
"SELECT SUM(long5) FROM druid.expressions WHERE long5 IS NOT NULL",
"SELECT string2, SUM(long5) FROM druid.expressions WHERE long5 IS NOT NULL GROUP BY 1",
// 38: EARLIEST aggregator long
"SELECT EARLIEST(long1) FROM foo",
"SELECT EARLIEST(long1) FROM druid.expressions",
// 39: EARLIEST aggregator double
"SELECT EARLIEST(double4) FROM foo",
"SELECT EARLIEST(double4) FROM druid.expressions",
// 40: EARLIEST aggregator float
"SELECT EARLIEST(float3) FROM foo",
"SELECT EARLIEST(float3) FROM druid.expressions",
// 41: nested OR filter
"SELECT dimSequential, COUNT(*) from foo WHERE dimSequential = '1' AND (dimMultivalEnumerated IN ('Hello', 'World', 'Foo', 'Bar', 'Baz') OR sumLongSequential = 1) GROUP BY 1"
"SELECT dimSequential, COUNT(*) from druid.basic WHERE dimSequential = '1' AND (dimMultivalEnumerated IN ('Hello', 'World', 'druid.basic', 'Bar', 'Baz') OR sumLongSequential = 1) GROUP BY 1"
);
@Param({"5000000"})
private int rowsPerSegment;
// Can be "false", "true", or "force"
@Param({"false", "force"})
private String vectorize;
// Can be "none" or "front-coded-N"
@Param({
"none",
"front-coded-4"
})
private String stringEncoding;
@Param({
"explicit",
"auto"
})
private String schema;
@Param({
"0",
"1",
@ -528,254 +412,15 @@ public class SqlBenchmark
})
private String query;
// Can be STORAGE_MMAP, STORAGE_FRAME_ROW, or STORAGE_FRAME_COLUMNAR
@Param({STORAGE_MMAP})
private String storageType;
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
private final Closer closer = Closer.create();
@Setup(Level.Trial)
public void setup()
@Override
public String getQuery()
{
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
final DataSegment dataSegment = schemaInfo.makeSegmentDescriptor("foo");
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment);
final QueryableIndex index;
if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList());
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(columnSchemas).build(),
TransformSpec.NONE,
IndexSpec.builder().withStringDictionaryEncoding(getStringEncodingStrategy()).build(),
Granularities.NONE,
rowsPerSegment
);
} else {
index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment);
}
final Pair<PlannerFactory, SqlEngine> sqlSystem = createSqlSystem(
ImmutableMap.of(dataSegment, index),
Collections.emptyMap(),
null,
closer
);
plannerFactory = sqlSystem.lhs;
engine = sqlSystem.rhs;
final String sql = QUERIES.get(Integer.parseInt(query));
final ObjectMapper jsonMapper = CalciteTests.getJsonMapper();
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] planResult = resultSequence.toList().get(0);
log.info("Native query plan:\n" +
jsonMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
);
}
catch (JsonProcessingException ignored) {
}
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Yielder<Object[]> yielder = Yielders.each(resultSequence);
int rowCounter = 0;
while (!yielder.isDone()) {
rowCounter++;
yielder.next(yielder.get());
}
log.info("Total result row count:" + rowCounter);
}
catch (Throwable ignored) {
}
return QUERIES.get(Integer.parseInt(query));
}
private StringEncodingStrategy getStringEncodingStrategy()
@Override
public List<String> getDatasources()
{
if (stringEncoding.startsWith("front-coded")) {
String[] split = stringEncoding.split("-");
int bucketSize = Integer.parseInt(split[2]);
return new StringEncodingStrategy.FrontCoded(bucketSize, FrontCodedIndexed.V1);
} else {
return new StringEncodingStrategy.Utf8();
}
}
public static Pair<PlannerFactory, SqlEngine> createSqlSystem(
final Map<DataSegment, QueryableIndex> segmentMap,
final Map<String, LookupExtractor> lookupMap,
@Nullable final String storageType,
final Closer closer
)
{
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate);
final PlannerConfig plannerConfig = new PlannerConfig();
for (final Map.Entry<DataSegment, QueryableIndex> segmentEntry : segmentMap.entrySet()) {
addSegmentToWalker(walker, segmentEntry.getKey(), segmentEntry.getValue(), storageType);
}
// Child injector that adds additional lookups.
final Injector injector = new StartupInjectorBuilder()
.withEmptyProperties()
.add(
new ExpressionModule(),
new SegmentWranglerModule(),
new LookylooModule(),
new SqlAggregationModule(),
new CalciteTestOperatorModule(),
binder -> {
for (Map.Entry<String, LookupExtractor> entry : lookupMap.entrySet()) {
MapBinder.newMapBinder(binder, String.class, LookupExtractor.class)
.addBinding(entry.getKey())
.toProvider(entry::getValue)
.in(LazySingleton.class);
}
}
)
.build();
final DruidSchemaCatalog rootSchema =
QueryFrameworkUtils.createMockRootSchema(
injector,
conglomerate,
walker,
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER
);
final SqlEngine engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
final PlannerFactory plannerFactory = new PlannerFactory(
rootSchema,
createOperatorTable(injector),
injector.getInstance(ExprMacroTable.class),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
injector.getInstance(Key.get(ObjectMapper.class, Json.class)),
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
new JoinableFactoryWrapper(QueryFrameworkUtils.createDefaultJoinableFactory(injector)),
CatalogResolver.NULL_RESOLVER,
new AuthConfig(),
new DruidHookDispatcher()
);
return Pair.of(plannerFactory, engine);
}
private static void addSegmentToWalker(
final SpecificSegmentsQuerySegmentWalker walker,
final DataSegment descriptor,
final QueryableIndex index,
@Nullable final String storageType
)
{
if (storageType == null || STORAGE_MMAP.equals(storageType)) {
walker.add(descriptor, new QueryableIndexSegment(index, descriptor.getId()));
} else if (STORAGE_FRAME_ROW.equals(storageType)) {
walker.add(
descriptor,
FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexCursorFactory(index),
FrameType.ROW_BASED,
descriptor.getId()
)
);
} else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) {
walker.add(
descriptor,
FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexCursorFactory(index),
FrameType.COLUMNAR,
descriptor.getId()
)
);
} else {
throw new IAE("Invalid storageType[%s]", storageType);
}
}
private static DruidOperatorTable createOperatorTable(final Injector injector)
{
try {
final Set<SqlOperatorConversion> extractionOperators = new HashSet<>();
extractionOperators.add(injector.getInstance(QueryLookupOperatorConversion.class));
final ApproxCountDistinctSqlAggregator countDistinctSqlAggregator =
new ApproxCountDistinctSqlAggregator(new HllSketchApproxCountDistinctSqlAggregator());
final Set<SqlAggregator> aggregators = new HashSet<>(
ImmutableList.of(
new DoublesSketchApproxQuantileSqlAggregator(),
new DoublesSketchObjectSqlAggregator(),
new HllSketchApproxCountDistinctSqlAggregator(),
new HllSketchApproxCountDistinctUtf8SqlAggregator(),
new ThetaSketchApproxCountDistinctSqlAggregator(),
new CountSqlAggregator(countDistinctSqlAggregator),
countDistinctSqlAggregator
)
);
return new DruidOperatorTable(aggregators, extractionOperators);
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
@TearDown(Level.Trial)
public void tearDown() throws Exception
{
closer.close();
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void querySql(Blackhole blackhole)
{
final Map<String, Object> context = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize
);
final String sql = QUERIES.get(Integer.parseInt(query));
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void planSql(Blackhole blackhole)
{
final Map<String, Object> context = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize
);
final String sql = QUERIES.get(Integer.parseInt(query));
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan();
blackhole.consume(plannerResult);
}
return ImmutableList.of(SqlBenchmarkDatasets.BASIC);
}
}

View File

@ -0,0 +1,423 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.druid.data.input.impl.AggregateProjectionSpec;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.LongDimensionSchema;
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory;
import org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory;
import org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.joda.time.Interval;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class SqlBenchmarkDatasets
{
private static Map<String, BenchmarkSchema> DATASET_SCHEMAS = new HashMap<>();
public static String BASIC = "basic";
public static String EXPRESSIONS = "expressions";
public static String NESTED = "nested";
public static String DATASKETCHES = "datasketches";
public static String PROJECTIONS = "projections";
public static String GROUPER = "grouper";
// initialize all benchmark dataset schemas to feed the data generators when running benchmarks, add any additional
// datasets to this initializer as needed and they will be available to any benchmarks at the table name added here
static {
// the classic 'basic' schema, string dimension oriented with a few metrics
final GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get(GeneratorBasicSchemas.BASIC_SCHEMA);
DATASET_SCHEMAS.put(
BASIC,
new BenchmarkSchema(
Collections.singletonList(makeSegment(BASIC, basicSchema.getDataInterval())),
basicSchema,
TransformSpec.NONE,
makeDimensionsSpec(basicSchema),
basicSchema.getAggsArray(),
Collections.emptyList(),
Granularities.NONE
)
);
// expression testbench schema, lots of different column types
final GeneratorSchemaInfo expressionsSchema = GeneratorBasicSchemas.SCHEMA_MAP.get(
GeneratorBasicSchemas.EXPRESSION_TESTBENCH_SCHEMA
);
DATASET_SCHEMAS.put(
EXPRESSIONS,
new BenchmarkSchema(
Collections.singletonList(makeSegment(EXPRESSIONS, expressionsSchema.getDataInterval())),
expressionsSchema,
TransformSpec.NONE,
makeDimensionsSpec(expressionsSchema),
expressionsSchema.getAggsArray(),
Collections.emptyList(),
Granularities.NONE
)
);
// expressions schema but with transform to create nested column
DATASET_SCHEMAS.put(
NESTED,
new BenchmarkSchema(
Collections.singletonList(makeSegment(NESTED, expressionsSchema.getDataInterval())),
expressionsSchema,
new TransformSpec(
null,
ImmutableList.of(
new ExpressionTransform(
"nested",
"json_object('long1', long1, 'nesteder', json_object('string1', string1, 'long2', long2, 'double3',double3, 'string5', string5))",
TestExprMacroTable.INSTANCE
)
)
),
DimensionsSpec.builder().setDimensions(
ImmutableList.copyOf(
Iterables.concat(
expressionsSchema.getDimensionsSpecExcludeAggs().getDimensions(),
Collections.singletonList(new AutoTypeColumnSchema("nested", null))
)
)
).build(),
expressionsSchema.getAggsArray(),
Collections.emptyList(),
Granularities.NONE
)
);
// expressions schema but with some datasketch aggs defined
GeneratorSchemaInfo datasketchesSchema = new GeneratorSchemaInfo(
expressionsSchema.getColumnSchemas(),
ImmutableList.of(
new HllSketchBuildAggregatorFactory("hll_string5", "string5", null, null, null, false, true),
new SketchMergeAggregatorFactory("theta_string5", "string5", null, null, null, null),
new DoublesSketchAggregatorFactory("quantiles_float4", "float4", null, null, null),
new DoublesSketchAggregatorFactory("quantiles_long3", "long3", null, null, null)
),
expressionsSchema.getDataInterval(),
true
);
DATASET_SCHEMAS.put(
DATASKETCHES,
new BenchmarkSchema(
Collections.singletonList(makeSegment(DATASKETCHES, datasketchesSchema.getDataInterval())),
datasketchesSchema,
TransformSpec.NONE,
makeDimensionsSpec(datasketchesSchema),
datasketchesSchema.getAggsArray(),
Collections.emptyList(),
Granularities.NONE
)
);
// expressions schema with projections
DATASET_SCHEMAS.put(
PROJECTIONS,
new BenchmarkSchema(
Collections.singletonList(makeSegment(PROJECTIONS, expressionsSchema.getDataInterval())),
expressionsSchema,
TransformSpec.NONE,
makeDimensionsSpec(expressionsSchema),
expressionsSchema.getAggsArray(),
Arrays.asList(
new AggregateProjectionSpec(
"string2_hourly_sums_hll",
VirtualColumns.create(
Granularities.toVirtualColumn(Granularities.HOUR, "__gran")
),
Arrays.asList(
new StringDimensionSchema("string2"),
new LongDimensionSchema("__gran")
),
new AggregatorFactory[]{
new LongSumAggregatorFactory("long4_sum", "long4"),
new DoubleSumAggregatorFactory("double2_sum", "double2"),
new HllSketchBuildAggregatorFactory("hll_string5", "string5", null, null, null, false, true)
}
),
new AggregateProjectionSpec(
"string2_long2_sums",
VirtualColumns.EMPTY,
Arrays.asList(
new StringDimensionSchema("string2"),
new LongDimensionSchema("long2")
),
new AggregatorFactory[]{
new LongSumAggregatorFactory("long4_sum", "long4"),
new DoubleSumAggregatorFactory("double2_sum", "double2"),
new HllSketchBuildAggregatorFactory("hll_string5", "string5", null, null, null, false, true)
}
)
),
Granularities.NONE
)
);
// group-by testing, 2 segments
final GeneratorSchemaInfo groupingSchema = GeneratorBasicSchemas.SCHEMA_MAP.get(
GeneratorBasicSchemas.GROUPBY_TESTBENCH_SCHEMA
);
DATASET_SCHEMAS.put(
GROUPER,
new BenchmarkSchema(
Arrays.asList(
makeSegment(GROUPER, groupingSchema.getDataInterval(), 0),
makeSegment(GROUPER, groupingSchema.getDataInterval(), 1)
),
groupingSchema,
new TransformSpec(
null,
ImmutableList.of(
// string array dims
new ExpressionTransform(
"stringArray-Sequential-100_000",
"array(\"string-Sequential-100_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"stringArray-Sequential-3_000_000",
"array(\"string-Sequential-10_000_000\")",
TestExprMacroTable.INSTANCE
),
/*
new ExpressionTransform(
"stringArray-Sequential-1_000_000_000",
"array(\"string-Sequential-1_000_000_000\")",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"stringArray-ZipF-1_000_000",
"array(\"string-ZipF-1_000_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"stringArray-Uniform-1_000_000",
"array(\"string-Uniform-1_000_000\")",
TestExprMacroTable.INSTANCE
),
// long array dims
new ExpressionTransform(
"longArray-Sequential-100_000",
"array(\"long-Sequential-100_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"longArray-Sequential-3_000_000",
"array(\"long-Sequential-10_000_000\")",
TestExprMacroTable.INSTANCE
),
/*
new ExpressionTransform(
"longArray-Sequential-1_000_000_000",
"array(\"long-Sequential-1_000_000_000\")",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"longArray-ZipF-1_000_000",
"array(\"long-ZipF-1_000_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"longArray-Uniform-1_000_000",
"array(\"long-Uniform-1_000_000\")",
TestExprMacroTable.INSTANCE
),
// nested complex json dim
new ExpressionTransform(
"nested-Sequential-100_000",
"json_object('long1', \"long-Sequential-100_000\", 'nesteder', json_object('long1', \"long-Sequential-100_000\"))",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"nested-Sequential-3_000_000",
"json_object('long1', \"long-Sequential-10_000_000\", 'nesteder', json_object('long1', \"long-Sequential-10_000_000\"))",
TestExprMacroTable.INSTANCE
),
/*
new ExpressionTransform(
"nested-Sequential-1_000_000_000",
"json_object('long1', \"long-Sequential-1_000_000_000\", 'nesteder', json_object('long1', \"long-Sequential-1_000_000_000\"))",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"nested-ZipF-1_000_000",
"json_object('long1', \"long-ZipF-1_000_000\", 'nesteder', json_object('long1', \"long-ZipF-1_000_000\"))",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"nested-Uniform-1_000_000",
"json_object('long1', \"long-Uniform-1_000_000\", 'nesteder', json_object('long1', \"long-Uniform-1_000_000\"))",
TestExprMacroTable.INSTANCE
)
)
),
makeDimensionsSpec(groupingSchema),
groupingSchema.getAggsArray(),
Collections.emptyList(),
Granularities.NONE
)
);
}
public static BenchmarkSchema getSchema(String dataset)
{
return DATASET_SCHEMAS.get(dataset);
}
private static DataSegment makeSegment(String datasource, Interval interval)
{
return makeSegment(datasource, interval, 0);
}
private static DataSegment makeSegment(String datasource, Interval interval, int partitionNumber)
{
return DataSegment.builder()
.dataSource(datasource)
.interval(interval)
.version("1")
.shardSpec(new LinearShardSpec(partitionNumber))
.size(0)
.build();
}
private static DimensionsSpec makeDimensionsSpec(GeneratorSchemaInfo schemaInfo)
{
return DimensionsSpec.builder().setDimensions(schemaInfo.getDimensionsSpecExcludeAggs().getDimensions()).build();
}
public static class BenchmarkSchema
{
private final List<DataSegment> dataSegments;
private final GeneratorSchemaInfo generatorSchemaInfo;
private final TransformSpec transformSpec;
private final DimensionsSpec dimensionsSpec;
private final AggregatorFactory[] aggregators;
private final Granularity queryGranularity;
private final List<AggregateProjectionSpec> projections;
public BenchmarkSchema(
List<DataSegment> dataSegments,
GeneratorSchemaInfo generatorSchemaInfo,
TransformSpec transformSpec,
DimensionsSpec dimensionSpec,
AggregatorFactory[] aggregators,
List<AggregateProjectionSpec> projections,
Granularity queryGranularity
)
{
this.dataSegments = dataSegments;
this.generatorSchemaInfo = generatorSchemaInfo;
this.transformSpec = transformSpec;
this.dimensionsSpec = dimensionSpec;
this.aggregators = aggregators;
this.queryGranularity = queryGranularity;
this.projections = projections;
}
public List<DataSegment> getDataSegments()
{
return dataSegments;
}
public GeneratorSchemaInfo getGeneratorSchemaInfo()
{
return generatorSchemaInfo;
}
public TransformSpec getTransformSpec()
{
return transformSpec;
}
public DimensionsSpec getDimensionsSpec()
{
return dimensionsSpec;
}
public AggregatorFactory[] getAggregators()
{
return aggregators;
}
public Granularity getQueryGranularity()
{
return queryGranularity;
}
public List<AggregateProjectionSpec> getProjections()
{
return projections;
}
public BenchmarkSchema asAutoDimensions()
{
return new SqlBenchmarkDatasets.BenchmarkSchema(
dataSegments,
generatorSchemaInfo,
transformSpec,
dimensionsSpec.withDimensions(
dimensionsSpec.getDimensions()
.stream()
.map(dim -> new AutoTypeColumnSchema(dim.getName(), null))
.collect(Collectors.toList())
),
aggregators,
projections.stream()
.map(projection -> new AggregateProjectionSpec(
projection.getName(),
projection.getVirtualColumns(),
projection.getGroupingColumns()
.stream()
.map(dim -> new AutoTypeColumnSchema(dim.getName(), null))
.collect(Collectors.toList()),
projection.getAggregators()
)).collect(Collectors.toList()),
queryGranularity
);
}
}
}

View File

@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import com.google.common.collect.ImmutableList;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.List;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlComplexMetricsColumnsBenchmark extends SqlBaseQueryBenchmark
{
private static final List<String> QUERIES = ImmutableList.of(
"SELECT APPROX_COUNT_DISTINCT_DS_HLL(hll_string5) FROM druid.datasketches",
"SELECT APPROX_COUNT_DISTINCT_DS_THETA(theta_string5) FROM druid.datasketches",
"SELECT DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_float4), 0.5) FROM druid.datasketches",
"SELECT DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_long3), 0.9) FROM druid.datasketches",
"SELECT string2, APPROX_COUNT_DISTINCT_DS_HLL(hll_string5) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC",
"SELECT string2, APPROX_COUNT_DISTINCT_DS_THETA(theta_string5, 4096) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC",
"SELECT string2, DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_float4), 0.5) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC",
"SELECT string2, DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_long3), 0.9) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC"
);
@Param({
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7"
})
private String query;
@Override
public String getQuery()
{
return QUERIES.get(Integer.parseInt(query));
}
@Override
public List<String> getDatasources()
{
return ImmutableList.of(SqlBenchmarkDatasets.DATASKETCHES);
}
}

View File

@ -19,69 +19,19 @@
package org.apache.druid.benchmark.query;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Yielder;
import org.apache.druid.java.util.common.guava.Yielders;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.hook.DruidHookDispatcher;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import javax.annotation.Nullable;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
/**
* Benchmark that tests various SQL queries.
@ -90,157 +40,107 @@ import java.util.stream.Collectors;
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlExpressionBenchmark
public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark
{
private static final Logger log = new Logger(SqlExpressionBenchmark.class);
static {
NullHandling.initializeForTests();
ExpressionProcessing.initializeForTests();
}
private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
{
@Override
public int intermediateComputeSizeBytes()
{
return 512 * 1024 * 1024;
}
@Override
public int getNumMergeBuffers()
{
return 3;
}
@Override
public int getNumThreads()
{
return 1;
}
@Override
public String getFormatString()
{
return "benchmarks-processing-%s";
}
};
private static final List<String> QUERIES = ImmutableList.of(
// ===========================
// non-expression reference queries
// ===========================
// 0: non-expression timeseries reference, 1 columns
"SELECT SUM(long1) FROM foo",
"SELECT SUM(long1) FROM expressions",
// 1: non-expression timeseries reference, 2 columns
"SELECT SUM(long1), SUM(long2) FROM foo",
"SELECT SUM(long1), SUM(long2) FROM expressions",
// 2: non-expression timeseries reference, 3 columns
"SELECT SUM(long1), SUM(long4), SUM(double1) FROM foo",
"SELECT SUM(long1), SUM(long4), SUM(double1) FROM expressions",
// 3: non-expression timeseries reference, 4 columns
"SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3) FROM foo",
"SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3) FROM expressions",
// 4: non-expression timeseries reference, 5 columns
"SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3), SUM(long5) FROM foo",
"SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3), SUM(long5) FROM expressions",
// 5: group by non-expr with 1 agg
"SELECT string2, SUM(long1) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1) FROM expressions GROUP BY 1 ORDER BY 2",
// 6: group by non-expr with 2 agg
"SELECT string2, SUM(long1), SUM(double3) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1), SUM(double3) FROM expressions GROUP BY 1 ORDER BY 2",
// ===========================
// expressions
// ===========================
// 7: math op - 2 longs
"SELECT SUM(long1 * long2) FROM foo",
"SELECT SUM(long1 * long2) FROM expressions",
// 8: mixed math - 2 longs, 1 double
"SELECT SUM((long1 * long2) / double1) FROM foo",
"SELECT SUM((long1 * long2) / double1) FROM expressions",
// 9: mixed math - 2 longs, 1 double, 1 float
"SELECT SUM(float3 + ((long1 * long4)/double1)) FROM foo",
"SELECT SUM(float3 + ((long1 * long4)/double1)) FROM expressions",
// 10: mixed math - 3 longs, 1 double, 1 float
"SELECT SUM(long5 - (float3 + ((long1 * long4)/double1))) FROM foo",
"SELECT SUM(long5 - (float3 + ((long1 * long4)/double1))) FROM expressions",
// 11: all same math op - 3 longs, 1 double, 1 float
"SELECT SUM(long5 * float3 * long1 * long4 * double1) FROM foo",
"SELECT SUM(long5 * float3 * long1 * long4 * double1) FROM expressions",
// 12: cos
"SELECT cos(double2) FROM foo",
"SELECT cos(double2) FROM expressions",
// 13: unary negate
"SELECT SUM(-long4) FROM foo",
"SELECT SUM(-long4) FROM expressions",
// 14: string long
"SELECT SUM(PARSE_LONG(string1)) FROM foo",
"SELECT SUM(PARSE_LONG(string1)) FROM expressions",
// 15: string longer
"SELECT SUM(PARSE_LONG(string3)) FROM foo",
"SELECT SUM(PARSE_LONG(string3)) FROM expressions",
// 16: time floor, non-expr col + reg agg
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 17: time floor, non-expr col + expr agg
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 18: time floor + non-expr agg (timeseries) (non-expression reference)
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM foo GROUP BY 1 ORDER BY 1",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM expressions GROUP BY 1 ORDER BY 1",
// 19: time floor + expr agg (timeseries)
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 1",
// 20: time floor + non-expr agg (group by)
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM expressions GROUP BY 1 ORDER BY 2",
// 21: time floor + expr agg (group by)
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2",
// 22: time floor offset by 1 day + non-expr agg (group by)
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1) FROM foo GROUP BY 1 ORDER BY 1",
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1) FROM expressions GROUP BY 1 ORDER BY 1",
// 23: time floor offset by 1 day + expr agg (group by)
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 1",
// 24: group by long expr with non-expr agg
"SELECT (long1 * long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT (long1 * long2), SUM(double1) FROM expressions GROUP BY 1 ORDER BY 2",
// 25: group by non-expr with expr agg
"SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2",
// 26: group by string expr with non-expr agg
"SELECT CONCAT(string2, '-', long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT CONCAT(string2, '-', long2), SUM(double1) FROM expressions GROUP BY 1 ORDER BY 2",
// 27: group by string expr with expr agg
"SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM expressions GROUP BY 1 ORDER BY 2",
// 28: group by single input string low cardinality expr with expr agg
"SELECT CONCAT(string2, '-', 'foo'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT CONCAT(string2, '-', 'expressions'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2",
// 29: group by single input string high cardinality expr with expr agg
"SELECT CONCAT(string3, '-', 'foo'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT CONCAT(string3, '-', 'expressions'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2",
// 30: logical and operator
"SELECT CAST(long1 as BOOLEAN) AND CAST (long2 as BOOLEAN), COUNT(*) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT CAST(long1 as BOOLEAN) AND CAST (long2 as BOOLEAN), COUNT(*) FROM expressions GROUP BY 1 ORDER BY 2",
// 31: isnull, notnull
"SELECT long5 IS NULL, long3 IS NOT NULL, count(*) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT long5 IS NULL, long3 IS NOT NULL, count(*) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 32: time shift, non-expr col + reg agg, regular
"SELECT TIME_SHIFT(__time, 'PT1H', 3), string2, SUM(double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_SHIFT(__time, 'PT1H', 3), string2, SUM(double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 33: time shift, non-expr col + expr agg, sequential low cardinality
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long1), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long1), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 34: time shift + non-expr agg (timeseries) (non-expression reference), zipf distribution low cardinality
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long2), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long2), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 35: time shift + expr agg (timeseries), zipf distribution high cardinality
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long3), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long3), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 36: time shift + non-expr agg (group by), uniform distribution low cardinality
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 37: time shift + expr agg (group by), uniform distribution high cardinality
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3",
// 38,39: array element filtering
"SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2",
"SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2",
"SELECT string1, long1 FROM expressions WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2",
"SELECT string1, long1 FROM expressions WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2",
// 40: regex filtering
"SELECT string4, COUNT(*) FROM foo WHERE REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL GROUP BY 1",
"SELECT string4, COUNT(*) FROM expressions WHERE REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL GROUP BY 1",
// 41: complicated filtering
"SELECT string2, SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1) FROM expressions WHERE string1 = '1000' AND string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) GROUP BY 1 ORDER BY 2",
// 42: array_contains expr
"SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM foo",
"SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM foo",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo",
"SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM expressions",
"SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM expressions",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM expressions",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM expressions",
// 46: filters with random orders
"SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000' GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2"
);
@Param({"5000000"})
private int rowsPerSegment;
@Param({
"false",
"force"
})
private String vectorize;
@Param({
"explicit",
"auto"
})
private String schema;
"SELECT string2, SUM(long1) FROM expressions WHERE string5 LIKE '%1%' AND string1 = '1000' GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1) FROM expressions WHERE string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2"
);
@Param({
"singleString",
@ -304,154 +204,26 @@ public class SqlExpressionBenchmark
})
private String query;
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
private Closer closer = Closer.create();
@Setup(Level.Trial)
public void setup()
@Override
public String getQuery()
{
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");
final DataSegment dataSegment = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build();
final PlannerConfig plannerConfig = new PlannerConfig();
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info(
"Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].",
segmentGenerator.getCacheDir(),
rowsPerSegment,
schema
);
final QueryableIndex index;
if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList());
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(columnSchemas).build(),
TransformSpec.NONE,
IndexSpec.DEFAULT,
Granularities.NONE,
rowsPerSegment
);
} else {
index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment);
}
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(
closer,
PROCESSING_CONFIG
);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate).add(
dataSegment,
index
);
closer.register(walker);
final ObjectMapper jsonMapper = CalciteTests.getJsonMapper();
final DruidSchemaCatalog rootSchema =
CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
plannerFactory = new PlannerFactory(
rootSchema,
CalciteTests.createOperatorTable(),
CalciteTests.createExprMacroTable(),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
jsonMapper,
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
CalciteTests.createJoinableFactoryWrapper(),
CatalogResolver.NULL_RESOLVER,
new AuthConfig(),
new DruidHookDispatcher()
);
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory,
QUERIES.get(Integer.parseInt(query))
);
log.info("non-vectorized and vectorized results match");
}
catch (Throwable ex) {
log.warn(ex, "non-vectorized and vectorized results do not match");
}
final String sql = QUERIES.get(Integer.parseInt(query));
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(
engine,
"EXPLAIN PLAN FOR " + sql,
ImmutableMap.of(
"useNativeQueryExplain",
true
)
)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] planResult = resultSequence.toList().get(0);
log.info("Native query plan:\n" +
jsonMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
);
}
catch (JsonProcessingException ex) {
log.warn(ex, "explain failed");
}
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Yielder<Object[]> yielder = Yielders.each(resultSequence);
int rowCounter = 0;
while (!yielder.isDone()) {
rowCounter++;
yielder.next(yielder.get());
}
log.info("Total result row count:" + rowCounter);
}
catch (Throwable ex) {
log.warn(ex, "failed to count rows");
}
return QUERIES.get(Integer.parseInt(query));
}
@TearDown(Level.Trial)
public void tearDown() throws Exception
@Override
public List<String> getDatasources()
{
closer.close();
return ImmutableList.of(SqlBenchmarkDatasets.EXPRESSIONS);
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void querySql(Blackhole blackhole)
@Override
protected Map<String, Object> getContext()
{
final Map<String, Object> context = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize,
GroupByQueryConfig.CTX_KEY_DEFER_EXPRESSION_DIMENSIONS, deferExpressionDimensions
);
final String sql = QUERIES.get(Integer.parseInt(query));
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
return context;
}
}

View File

@ -19,109 +19,23 @@
package org.apache.druid.benchmark.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.guice.BuiltInTypesModule;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.hook.DruidHookDispatcher;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlGroupByBenchmark
public class SqlGroupByBenchmark extends SqlBaseQueryBenchmark
{
static {
NullHandling.initializeForTests();
ExpressionProcessing.initializeForTests();
BuiltInTypesModule.registerHandlersAndSerde();
}
private static final Logger log = new Logger(SqlGroupByBenchmark.class);
private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
{
@Override
public int intermediateComputeSizeBytes()
{
return 512 * 1024 * 1024;
}
@Override
public int getNumMergeBuffers()
{
return 3;
}
@Override
public int getNumThreads()
{
return 1;
}
@Override
public String getFormatString()
{
return "benchmarks-processing-%s";
}
};
@Param({
"string-Sequential-100_000",
"string-Sequential-10_000_000",
@ -167,226 +81,16 @@ public class SqlGroupByBenchmark
})
private String groupingDimension;
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
private Closer closer = Closer.create();
@Setup(Level.Trial)
public void setup()
@Override
public String getQuery()
{
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("groupBy-testbench");
final DataSegment dataSegment = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build();
final DataSegment dataSegment2 = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(1))
.size(0)
.build();
final PlannerConfig plannerConfig = new PlannerConfig();
String columnCardinalityWithUnderscores = groupingDimension.substring(groupingDimension.lastIndexOf('-') + 1);
int rowsPerSegment = Integer.parseInt(StringUtils.replace(columnCardinalityWithUnderscores, "_", ""));
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
TransformSpec transformSpec = new TransformSpec(
null,
ImmutableList.of(
// string array dims
new ExpressionTransform(
"stringArray-Sequential-100_000",
"array(\"string-Sequential-100_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"stringArray-Sequential-3_000_000",
"array(\"string-Sequential-10_000_000\")",
TestExprMacroTable.INSTANCE
),
/*
new ExpressionTransform(
"stringArray-Sequential-1_000_000_000",
"array(\"string-Sequential-1_000_000_000\")",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"stringArray-ZipF-1_000_000",
"array(\"string-ZipF-1_000_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"stringArray-Uniform-1_000_000",
"array(\"string-Uniform-1_000_000\")",
TestExprMacroTable.INSTANCE
),
// long array dims
new ExpressionTransform(
"longArray-Sequential-100_000",
"array(\"long-Sequential-100_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"longArray-Sequential-3_000_000",
"array(\"long-Sequential-10_000_000\")",
TestExprMacroTable.INSTANCE
),
/*
new ExpressionTransform(
"longArray-Sequential-1_000_000_000",
"array(\"long-Sequential-1_000_000_000\")",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"longArray-ZipF-1_000_000",
"array(\"long-ZipF-1_000_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"longArray-Uniform-1_000_000",
"array(\"long-Uniform-1_000_000\")",
TestExprMacroTable.INSTANCE
),
// nested complex json dim
new ExpressionTransform(
"nested-Sequential-100_000",
"json_object('long1', \"long-Sequential-100_000\", 'nesteder', json_object('long1', \"long-Sequential-100_000\"))",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"nested-Sequential-3_000_000",
"json_object('long1', \"long-Sequential-10_000_000\", 'nesteder', json_object('long1', \"long-Sequential-10_000_000\"))",
TestExprMacroTable.INSTANCE
),
/*new ExpressionTransform(
"nested-Sequential-1_000_000_000",
"json_object('long1', \"long-Sequential-1_000_000_000\", 'nesteder', json_object('long1', \"long-Sequential-1_000_000_000\"))",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"nested-ZipF-1_000_000",
"json_object('long1', \"long-ZipF-1_000_000\", 'nesteder', json_object('long1', \"long-ZipF-1_000_000\"))",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"nested-Uniform-1_000_000",
"json_object('long1', \"long-Uniform-1_000_000\", 'nesteder', json_object('long1', \"long-Uniform-1_000_000\"))",
TestExprMacroTable.INSTANCE
)
)
);
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList());
List<DimensionSchema> transformSchemas = transformSpec
.getTransforms()
.stream()
.map(
transform -> new AutoTypeColumnSchema(transform.getName(), null)
)
.collect(Collectors.toList());
final QueryableIndex index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder()
.setDimensions(ImmutableList.<DimensionSchema>builder()
.addAll(columnSchemas)
.addAll(transformSchemas)
.build()
)
.build(),
transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(new StringEncodingStrategy.Utf8()).build(),
Granularities.NONE,
rowsPerSegment
);
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(
closer,
PROCESSING_CONFIG
);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate)
.add(dataSegment, index)
.add(dataSegment2, index);
closer.register(walker);
// Hacky and pollutes global namespace, but it is fine since benchmarks are run in isolation. Wasn't able
// to work up a cleaner way of doing it by modifying the injector.
CalciteTests.getJsonMapper().registerModules(BuiltInTypesModule.getJacksonModulesList());
final DruidSchemaCatalog rootSchema =
CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
plannerFactory = new PlannerFactory(
rootSchema,
CalciteTests.createOperatorTable(),
CalciteTests.createExprMacroTable(),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
CalciteTests.getJsonMapper(),
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
CalciteTests.createJoinableFactoryWrapper(),
CatalogResolver.NULL_RESOLVER,
new AuthConfig(),
new DruidHookDispatcher()
);
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory,
sqlQuery(groupingDimension)
);
log.info("non-vectorized and vectorized results match");
}
catch (Throwable ex) {
log.warn(ex, "non-vectorized and vectorized results do not match");
}
return StringUtils.format("SELECT \"%s\", COUNT(*) FROM druid.%s GROUP BY 1", groupingDimension, SqlBenchmarkDatasets.GROUPER);
}
@TearDown(Level.Trial)
public void tearDown() throws Exception
@Override
public List<String> getDatasources()
{
closer.close();
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void querySql(Blackhole blackhole)
{
final String sql = sqlQuery(groupingDimension);
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, Collections.emptyMap())) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
}
private static String sqlQuery(String groupingDimension)
{
return StringUtils.format("SELECT \"%s\", COUNT(*) FROM foo GROUP BY 1", groupingDimension);
return Collections.singletonList(SqlBenchmarkDatasets.GROUPER);
}
}

View File

@ -19,224 +19,109 @@
package org.apache.druid.benchmark.query;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Yielder;
import org.apache.druid.java.util.common.guava.Yielders;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.data.FrontCodedIndexed;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.hook.DruidHookDispatcher;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlNestedDataBenchmark
public class SqlNestedDataBenchmark extends SqlBaseQueryBenchmark
{
private static final Logger log = new Logger(SqlNestedDataBenchmark.class);
static {
NullHandling.initializeForTests();
ExpressionProcessing.initializeForTests();
}
private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
{
@Override
public int intermediateComputeSizeBytes()
{
return 512 * 1024 * 1024;
}
@Override
public int getNumMergeBuffers()
{
return 3;
}
@Override
public int getNumThreads()
{
return 1;
}
@Override
public String getFormatString()
{
return "benchmarks-processing-%s";
}
};
private static final List<String> QUERIES = ImmutableList.of(
// ===========================
// non-nested reference queries
// ===========================
// 0,1: timeseries, 1 columns
"SELECT SUM(long1) FROM foo",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo",
"SELECT SUM(long1) FROM druid.nested",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested",
// 2,3: timeseries, 2 columns
"SELECT SUM(long1), SUM(long2) FROM foo",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)) FROM foo",
"SELECT SUM(long1), SUM(long2) FROM druid.nested",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)) FROM druid.nested",
// 4,5: timeseries, 3 columns
"SELECT SUM(long1), SUM(long2), SUM(double3) FROM foo",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo",
"SELECT SUM(long1), SUM(long2), SUM(double3) FROM druid.nested",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested",
// 6,7: group by string with 1 agg
"SELECT string1, SUM(long1) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT string1, SUM(long1) FROM druid.nested GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested GROUP BY 1 ORDER BY 2",
// 8,9: group by string with 2 agg
"SELECT string1, SUM(long1), SUM(double3) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT string1, SUM(long1), SUM(double3) FROM druid.nested GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested GROUP BY 1 ORDER BY 2",
// 10,11: time-series filter string
"SELECT SUM(long1) FROM foo WHERE string1 = '10000' OR string1 = '1000'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000'",
"SELECT SUM(long1) FROM druid.nested WHERE string1 = '10000' OR string1 = '1000'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000'",
// 12,13: time-series filter long
"SELECT SUM(long1) FROM foo WHERE long2 = 10000 OR long2 = 1000",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 10000 OR JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 1000",
"SELECT SUM(long1) FROM druid.nested WHERE long2 = 10000 OR long2 = 1000",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 10000 OR JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 1000",
// 14,15: time-series filter double
"SELECT SUM(long1) FROM foo WHERE double3 < 10000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
"SELECT SUM(long1) FROM druid.nested WHERE double3 < 10000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 16,17: group by long filter by string
"SELECT long1, SUM(double3) FROM foo WHERE string1 = '10000' OR string1 = '1000' GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.long1' RETURNING BIGINT), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000' GROUP BY 1 ORDER BY 2",
"SELECT long1, SUM(double3) FROM druid.nested WHERE string1 = '10000' OR string1 = '1000' GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.long1' RETURNING BIGINT), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000' GROUP BY 1 ORDER BY 2",
// 18,19: group by string filter by long
"SELECT string1, SUM(double3) FROM foo WHERE long2 < 10000 AND long2 > 1000 GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) < 10000 AND JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) > 1000 GROUP BY 1 ORDER BY 2",
"SELECT string1, SUM(double3) FROM druid.nested WHERE long2 < 10000 AND long2 > 1000 GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) < 10000 AND JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) > 1000 GROUP BY 1 ORDER BY 2",
// 20,21: group by string filter by double
"SELECT string1, SUM(double3) FROM foo WHERE double3 < 10000.0 AND double3 > 1000.0 GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0 GROUP BY 1 ORDER BY 2",
"SELECT string1, SUM(double3) FROM druid.nested WHERE double3 < 10000.0 AND double3 > 1000.0 GROUP BY 1 ORDER BY 2",
"SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0 GROUP BY 1 ORDER BY 2",
// 22, 23:
"SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46)",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46)",
"SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46)",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46)",
// 24, 25
"SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1",
"SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1",
// 26, 27
"SELECT SUM(long1) FROM foo WHERE double3 < 1005.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 1005.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
"SELECT SUM(long1) FROM druid.nested WHERE double3 < 1005.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 1005.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 28, 29
"SELECT SUM(long1) FROM foo WHERE double3 < 2000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 2000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
"SELECT SUM(long1) FROM druid.nested WHERE double3 < 2000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 2000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 30, 31
"SELECT SUM(long1) FROM foo WHERE double3 < 3000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 3000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
"SELECT SUM(long1) FROM druid.nested WHERE double3 < 3000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 3000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 32,33
"SELECT SUM(long1) FROM foo WHERE double3 < 5000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 5000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
"SELECT SUM(long1) FROM druid.nested WHERE double3 < 5000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 5000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 34,35 smaller cardinality like range filter
"SELECT SUM(long1) FROM foo WHERE string1 LIKE '1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '1%'",
"SELECT SUM(long1) FROM druid.nested WHERE string1 LIKE '1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '1%'",
// 36,37 smaller cardinality like predicate filter
"SELECT SUM(long1) FROM foo WHERE string1 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '%1%'",
"SELECT SUM(long1) FROM druid.nested WHERE string1 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '%1%'",
// 38-39 moderate cardinality like range
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '1%'",
"SELECT SUM(long1) FROM druid.nested WHERE string5 LIKE '1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '1%'",
// 40, 41 big cardinality lex range
"SELECT SUM(long1) FROM foo WHERE string5 > '1'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') > '1'",
"SELECT SUM(long1) FROM druid.nested WHERE string5 > '1'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') > '1'",
// 42, 43 big cardinality like predicate filter
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'",
"SELECT SUM(long1) FROM druid.nested WHERE string5 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'",
// 44, 45 big cardinality like filter + selector filter with different ordering
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'",
"SELECT SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '1000' AND JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'",
"SELECT SUM(long1) FROM druid.nested WHERE string5 LIKE '%1%' AND string1 = '1000'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'",
"SELECT SUM(long1) FROM druid.nested WHERE string1 = '1000' AND string5 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') = '1000' AND JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'",
//48,49 bigger in
"SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)",
"SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)",
//50, 51 bigger in group
"SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1",
"SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
"SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1"
"SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1",
"SELECT long2 FROM druid.nested WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
"SELECT long2 FROM druid.nested WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1"
);
@Param({"5000000"})
private int rowsPerSegment;
@Param({
"false",
"force"
})
private String vectorize;
@Param({
"none",
"front-coded-4",
"front-coded-16"
})
private String stringEncoding;
@Param({
"explicit",
"auto"
})
private String schema;
@Param({
"0",
@ -298,179 +183,15 @@ public class SqlNestedDataBenchmark
})
private String query;
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
private final Closer closer = Closer.create();
@Setup(Level.Trial)
public void setup()
@Override
public String getQuery()
{
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");
final DataSegment dataSegment = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build();
final PlannerConfig plannerConfig = new PlannerConfig();
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment);
TransformSpec transformSpec = new TransformSpec(
null,
ImmutableList.of(
new ExpressionTransform(
"nested",
"json_object('long1', long1, 'nesteder', json_object('string1', string1, 'long2', long2, 'double3',double3, 'string5', string5))",
TestExprMacroTable.INSTANCE
)
)
);
StringEncodingStrategy encodingStrategy;
if (stringEncoding.startsWith("front-coded")) {
String[] split = stringEncoding.split("-");
int bucketSize = Integer.parseInt(split[2]);
encodingStrategy = new StringEncodingStrategy.FrontCoded(bucketSize, FrontCodedIndexed.V1);
} else {
encodingStrategy = new StringEncodingStrategy.Utf8();
}
final QueryableIndex index;
if ("auto".equals(schema)) {
Iterable<DimensionSchema> columnSchemas = Iterables.concat(
schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList()),
Collections.singletonList(new AutoTypeColumnSchema("nested", null))
);
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE,
rowsPerSegment
);
} else {
Iterable<DimensionSchema> columnSchemas = Iterables.concat(
schemaInfo.getDimensionsSpec().getDimensions(),
Collections.singletonList(new AutoTypeColumnSchema("nested", null))
);
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE,
rowsPerSegment
);
}
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(
closer,
PROCESSING_CONFIG
);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate).add(
dataSegment,
index
);
closer.register(walker);
final DruidSchemaCatalog rootSchema =
CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
plannerFactory = new PlannerFactory(
rootSchema,
CalciteTests.createOperatorTable(),
CalciteTests.createExprMacroTable(),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
CalciteTests.getJsonMapper(),
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
CalciteTests.createJoinableFactoryWrapper(),
CatalogResolver.NULL_RESOLVER,
new AuthConfig(),
new DruidHookDispatcher()
);
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory,
QUERIES.get(Integer.parseInt(query))
);
log.info("non-vectorized and vectorized results match");
}
catch (Throwable ex) {
log.warn(ex, "non-vectorized and vectorized results do not match");
}
final String sql = QUERIES.get(Integer.parseInt(query));
final ObjectMapper jsonMapper = CalciteTests.getJsonMapper();
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] planResult = resultSequence.toList().get(0);
log.info("Native query plan:\n" +
jsonMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
);
}
catch (JsonProcessingException ex) {
log.warn(ex, "explain failed");
}
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Yielder<Object[]> yielder = Yielders.each(resultSequence);
int rowCounter = 0;
while (!yielder.isDone()) {
rowCounter++;
yielder.next(yielder.get());
}
log.info("Total result row count:" + rowCounter);
}
catch (Throwable ex) {
log.warn(ex, "failed to count rows");
}
return QUERIES.get(Integer.parseInt(query));
}
@TearDown(Level.Trial)
public void tearDown() throws Exception
@Override
public List<String> getDatasources()
{
closer.close();
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void querySql(Blackhole blackhole)
{
final Map<String, Object> context = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize
);
final String sql = QUERIES.get(Integer.parseInt(query));
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
return ImmutableList.of(SqlBenchmarkDatasets.NESTED);
}
}

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import com.google.common.collect.ImmutableList;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.List;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlPlanBenchmark extends SqlBasePlanBenchmark
{
@Param({
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
"24",
"25",
"26",
"27",
"28",
"29",
"30",
"31",
"32",
"33",
"34",
"35",
"36",
"37",
"38",
"39",
"40",
"41"
})
private String query;
@Override
public String getQuery()
{
return SqlBenchmark.QUERIES.get(Integer.parseInt(query));
}
@Override
public List<String> getDatasources()
{
return ImmutableList.of(SqlBenchmarkDatasets.BASIC);
}
}

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.query.QueryContexts;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.List;
import java.util.Map;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlProjectionsBenchmark extends SqlBaseQueryBenchmark
{
private static final List<String> QUERIES = ImmutableList.of(
"SELECT string2, APPROX_COUNT_DISTINCT_DS_HLL(string5) FROM druid.projections GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long4) FROM druid.projections GROUP BY 1 ORDER BY 2"
);
@Param({
"0",
"1"
})
private String query;
@Param({
"true",
"false"
})
private boolean useProjections;
@Override
public String getQuery()
{
return QUERIES.get(Integer.parseInt(query));
}
@Override
public List<String> getDatasources()
{
return ImmutableList.of(SqlBenchmarkDatasets.PROJECTIONS);
}
@Override
protected Map<String, Object> getContext()
{
final Map<String, Object> context = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize,
useProjections ? QueryContexts.FORCE_PROJECTION : QueryContexts.NO_PROJECTIONS, true
);
return context;
}
}

View File

@ -19,98 +19,22 @@
package org.apache.druid.benchmark.query;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.multibindings.MapBinder;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.frame.FrameType;
import org.apache.druid.frame.testutil.FrameTestUtil;
import org.apache.druid.guice.ExpressionModule;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.guice.SegmentWranglerModule;
import org.apache.druid.guice.StartupInjectorBuilder;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctUtf8SqlAggregator;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchApproxQuantileSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchObjectSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.lookup.LookupExtractor;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator;
import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule;
import org.apache.druid.sql.calcite.aggregation.SqlAggregator;
import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.QueryLookupOperatorConversion;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidOperatorTable;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.LookylooModule;
import org.apache.druid.sql.calcite.util.QueryFrameworkUtils;
import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule;
import org.apache.druid.sql.hook.DruidHookDispatcher;
import org.apache.druid.timeline.DataSegment;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
/**
* Benchmark that tests various SQL queries.
@ -121,319 +45,55 @@ import java.util.stream.Collectors;
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlWindowFunctionsBenchmark
public class SqlWindowFunctionsBenchmark extends SqlBaseQueryBenchmark
{
static {
NullHandling.initializeForTests();
}
private static final List<String> QUERIES = ImmutableList.of(
"SELECT SUM(dimSequentialHalfNull) FROM druid.basic GROUP BY dimUniform",
"SELECT SUM(SUM(dimSequentialHalfNull)) OVER (ORDER BY dimUniform) FROM druid.basic GROUP BY dimUniform",
"SELECT ROW_NUMBER() OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic",
"SELECT COUNT(*) OVER (PARTITION BY dimUniform RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM druid.basic",
"SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED PRECEDING) FROM druid.basic",
"SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED FOLLOWING) FROM druid.basic",
"SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic GROUP BY dimSequential, dimUniform",
"SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic GROUP BY dimUniform, dimSequential",
"SELECT SUM(dimSequentialHalfNull) + SUM(dimZipf), LAG(SUM(dimSequentialHalfNull + dimZipf)) OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic GROUP BY __time, dimUniform, dimSequential"
);
private static final Logger log = new Logger(SqlWindowFunctionsBenchmark.class);
private static final String STORAGE_MMAP = "mmap";
private static final String STORAGE_FRAME_ROW = "frame-row";
private static final String STORAGE_FRAME_COLUMNAR = "frame-columnar";
@Param({"2000000"})
private int rowsPerSegment;
@Param({
"auto"
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8"
})
private String schema;
private String query;
// Can be STORAGE_MMAP, STORAGE_FRAME_ROW, or STORAGE_FRAME_COLUMNAR
@Param({STORAGE_MMAP})
private String storageType;
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
private final Closer closer = Closer.create();
private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() {
@Override
public int getNumMergeBuffers()
{
return 3;
}
@Override
public int intermediateComputeSizeBytes()
{
return 200_000_000;
}
};
@Setup(Level.Trial)
public void setup()
@Override
public String getQuery()
{
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
final DataSegment dataSegment = schemaInfo.makeSegmentDescriptor("foo");
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment);
final QueryableIndex index;
if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList());
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(columnSchemas).build(),
TransformSpec.NONE,
IndexSpec.builder().withStringDictionaryEncoding(getStringEncodingStrategy()).build(),
Granularities.NONE,
rowsPerSegment
);
} else {
index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment);
}
final Pair<PlannerFactory, SqlEngine> sqlSystem = createSqlSystem(
ImmutableMap.of(dataSegment, index),
Collections.emptyMap(),
null,
closer
);
plannerFactory = sqlSystem.lhs;
engine = sqlSystem.rhs;
return QUERIES.get(Integer.parseInt(query));
}
private StringEncodingStrategy getStringEncodingStrategy()
@Override
public List<String> getDatasources()
{
return new StringEncodingStrategy.Utf8();
return ImmutableList.of(SqlBenchmarkDatasets.BASIC);
}
public static Pair<PlannerFactory, SqlEngine> createSqlSystem(
final Map<DataSegment, QueryableIndex> segmentMap,
final Map<String, LookupExtractor> lookupMap,
@Nullable final String storageType,
final Closer closer
)
@Override
protected Map<String, Object> getContext()
{
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer, PROCESSING_CONFIG);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate);
final PlannerConfig plannerConfig = new PlannerConfig();
for (final Map.Entry<DataSegment, QueryableIndex> segmentEntry : segmentMap.entrySet()) {
addSegmentToWalker(walker, segmentEntry.getKey(), segmentEntry.getValue(), storageType);
}
// Child injector that adds additional lookups.
final Injector injector = new StartupInjectorBuilder()
.withEmptyProperties()
.add(
new ExpressionModule(),
new SegmentWranglerModule(),
new LookylooModule(),
new SqlAggregationModule(),
new CalciteTestOperatorModule(),
binder -> {
for (Map.Entry<String, LookupExtractor> entry : lookupMap.entrySet()) {
MapBinder.newMapBinder(binder, String.class, LookupExtractor.class)
.addBinding(entry.getKey())
.toProvider(entry::getValue)
.in(LazySingleton.class);
}
}
)
.build();
final DruidSchemaCatalog rootSchema =
QueryFrameworkUtils.createMockRootSchema(
injector,
conglomerate,
walker,
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER
);
final SqlEngine engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
final PlannerFactory plannerFactory = new PlannerFactory(
rootSchema,
createOperatorTable(injector),
injector.getInstance(ExprMacroTable.class),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
injector.getInstance(Key.get(ObjectMapper.class, Json.class)),
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
new JoinableFactoryWrapper(QueryFrameworkUtils.createDefaultJoinableFactory(injector)),
CatalogResolver.NULL_RESOLVER,
new AuthConfig(),
new DruidHookDispatcher()
);
return Pair.of(plannerFactory, engine);
}
private static void addSegmentToWalker(
final SpecificSegmentsQuerySegmentWalker walker,
final DataSegment descriptor,
final QueryableIndex index,
@Nullable final String storageType
)
{
if (storageType == null || STORAGE_MMAP.equals(storageType)) {
walker.add(descriptor, new QueryableIndexSegment(index, descriptor.getId()));
} else if (STORAGE_FRAME_ROW.equals(storageType)) {
walker.add(
descriptor,
FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexCursorFactory(index),
FrameType.ROW_BASED,
descriptor.getId()
)
);
} else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) {
walker.add(
descriptor,
FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexCursorFactory(index),
FrameType.COLUMNAR,
descriptor.getId()
)
);
} else {
throw new IAE("Invalid storageType[%s]", storageType);
}
}
private static DruidOperatorTable createOperatorTable(final Injector injector)
{
try {
final Set<SqlOperatorConversion> extractionOperators = new HashSet<>();
extractionOperators.add(injector.getInstance(QueryLookupOperatorConversion.class));
final ApproxCountDistinctSqlAggregator countDistinctSqlAggregator =
new ApproxCountDistinctSqlAggregator(new HllSketchApproxCountDistinctSqlAggregator());
final Set<SqlAggregator> aggregators = new HashSet<>(
ImmutableList.of(
new DoublesSketchApproxQuantileSqlAggregator(),
new DoublesSketchObjectSqlAggregator(),
new HllSketchApproxCountDistinctSqlAggregator(),
new HllSketchApproxCountDistinctUtf8SqlAggregator(),
new ThetaSketchApproxCountDistinctSqlAggregator(),
new CountSqlAggregator(countDistinctSqlAggregator),
countDistinctSqlAggregator
)
);
return new DruidOperatorTable(aggregators, extractionOperators);
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
@TearDown(Level.Trial)
public void tearDown() throws Exception
{
closer.close();
}
public void querySql(String sql, Blackhole blackhole)
{
final Map<String, Object> context = ImmutableMap.of(
return ImmutableMap.of(
QueryContexts.MAX_SUBQUERY_BYTES_KEY, "disabled",
QueryContexts.MAX_SUBQUERY_ROWS_KEY, -1
QueryContexts.MAX_SUBQUERY_ROWS_KEY, -1,
QueryContexts.VECTORIZE_KEY, vectorize,
QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize
);
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
}
@Benchmark
public void groupByWithoutWindow(Blackhole blackhole)
{
String sql = "SELECT SUM(dimSequentialHalfNull) "
+ "FROM foo "
+ "GROUP BY dimUniform";
querySql(sql, blackhole);
}
@Benchmark
public void groupByWithWindow(Blackhole blackhole)
{
String sql = "SELECT SUM(SUM(dimSequentialHalfNull)) "
+ "OVER (ORDER BY dimUniform) "
+ "FROM foo "
+ "GROUP BY dimUniform";
querySql(sql, blackhole);
}
@Benchmark
public void simpleWindow(Blackhole blackhole)
{
String sql = "SELECT ROW_NUMBER() "
+ "OVER (PARTITION BY dimUniform ORDER BY dimSequential) "
+ "FROM foo";
querySql(sql, blackhole);
}
@Benchmark
public void simpleWindowUnbounded(Blackhole blackhole)
{
String sql = "SELECT COUNT(*) "
+ "OVER (PARTITION BY dimUniform RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) "
+ "FROM foo";
querySql(sql, blackhole);
}
@Benchmark
public void windowTillCurrentRow(Blackhole blackhole)
{
String sql = "SELECT COUNT(*) "
+ "OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED PRECEDING) "
+ "FROM foo";
querySql(sql, blackhole);
}
@Benchmark
public void windowFromCurrentRow(Blackhole blackhole)
{
String sql = "SELECT COUNT(*) "
+ "OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED FOLLOWING) "
+ "FROM foo";
querySql(sql, blackhole);
}
@Benchmark
public void windowWithSorter(Blackhole blackhole)
{
String sql = "SELECT COUNT(*) "
+ "OVER (PARTITION BY dimUniform ORDER BY dimSequential) "
+ "FROM foo "
+ "GROUP BY dimSequential, dimUniform";
querySql(sql, blackhole);
}
@Benchmark
public void windowWithoutSorter(Blackhole blackhole)
{
String sql = "SELECT COUNT(*) "
+ "OVER (PARTITION BY dimUniform ORDER BY dimSequential) "
+ "FROM foo "
+ "GROUP BY dimUniform, dimSequential";
querySql(sql, blackhole);
}
@Benchmark
public void windowWithGroupbyTime(Blackhole blackhole)
{
String sql = "SELECT "
+ "SUM(dimSequentialHalfNull) + SUM(dimHyperUnique), "
+ "LAG(SUM(dimSequentialHalfNull + dimHyperUnique)) OVER (PARTITION BY dimUniform ORDER BY dimSequential) "
+ "FROM foo "
+ "GROUP BY __time, dimUniform, dimSequential";
querySql(sql, blackhole);
}
}

View File

@ -26,7 +26,9 @@ import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.joda.time.Interval;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
public class GeneratorSchemaInfo
@ -64,6 +66,20 @@ public class GeneratorSchemaInfo
return new DimensionsSpec(specs);
}
public DimensionsSpec getDimensionsSpecExcludeAggs()
{
final Set<String> metricsInputs = new HashSet<>();
for (AggregatorFactory agg : aggs) {
metricsInputs.addAll(agg.requiredFields());
}
List<DimensionSchema> specs = getColumnSchemas().stream()
.filter(x -> !x.isMetric() && !metricsInputs.contains(x.getName()))
.map(GeneratorColumnSchema::getDimensionSchema)
.collect(Collectors.toList());
return new DimensionsSpec(specs);
}
public List<AggregatorFactory> getAggs()
{
return aggs;

View File

@ -67,6 +67,11 @@ public class TestHelper
return new IndexMergerV9(JSON_MAPPER, getTestIndexIO(), segmentWriteOutMediumFactory, true);
}
public static IndexMergerV9 getTestIndexMergerV9(ObjectMapper jsonMapper, SegmentWriteOutMediumFactory segmentWriteOutMediumFactory)
{
return new IndexMergerV9(jsonMapper, getTestIndexIO(jsonMapper), segmentWriteOutMediumFactory, true);
}
public static IndexMergerV9 getTestIndexMergerV9(SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, ColumnConfig columnConfig)
{
return new IndexMergerV9(JSON_MAPPER, getTestIndexIO(columnConfig), segmentWriteOutMediumFactory, true);
@ -82,6 +87,16 @@ public class TestHelper
return new IndexIO(JSON_MAPPER, columnConfig);
}
public static IndexIO getTestIndexIO(ObjectMapper jsonMapper, ColumnConfig columnConfig)
{
return new IndexIO(jsonMapper, columnConfig);
}
public static IndexIO getTestIndexIO(ObjectMapper jsonMapper)
{
return new IndexIO(jsonMapper, ColumnConfig.SELECTION_SIZE);
}
public static AnnotationIntrospector makeAnnotationIntrospector()
{
// Prepare annotationIntrospector with similar logic, except skip Guice loading

View File

@ -19,10 +19,12 @@
package org.apache.druid.segment.generator;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.hash.Hashing;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.impl.AggregateProjectionSpec;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.MapInputRowParser;
import org.apache.druid.data.input.impl.TimestampSpec;
@ -55,6 +57,7 @@ import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@ -114,7 +117,16 @@ public class SegmentGenerator implements Closeable
final int numRows
)
{
return generate(dataSegment, schemaInfo, schemaInfo.getDimensionsSpec(), TransformSpec.NONE, IndexSpec.DEFAULT, granularity, numRows);
return generate(
dataSegment,
schemaInfo,
schemaInfo.getDimensionsSpec(),
TransformSpec.NONE,
IndexSpec.DEFAULT,
granularity,
Collections.emptyList(),
numRows
);
}
public QueryableIndex generate(
@ -125,7 +137,16 @@ public class SegmentGenerator implements Closeable
final int numRows
)
{
return generate(dataSegment, schemaInfo, schemaInfo.getDimensionsSpec(), TransformSpec.NONE, indexSpec, granularity, numRows);
return generate(
dataSegment,
schemaInfo,
schemaInfo.getDimensionsSpec(),
TransformSpec.NONE,
indexSpec,
granularity,
Collections.emptyList(),
numRows
);
}
public QueryableIndex generate(
@ -137,6 +158,53 @@ public class SegmentGenerator implements Closeable
final Granularity queryGranularity,
final int numRows
)
{
return generate(
dataSegment,
schemaInfo,
dimensionsSpec,
transformSpec,
indexSpec,
queryGranularity,
Collections.emptyList(),
numRows
);
}
public QueryableIndex generate(
final DataSegment dataSegment,
final GeneratorSchemaInfo schemaInfo,
final DimensionsSpec dimensionsSpec,
final TransformSpec transformSpec,
final IndexSpec indexSpec,
final Granularity queryGranularity,
final List<AggregateProjectionSpec> projectionSpecs,
final int numRows
)
{
return generate(
dataSegment,
schemaInfo,
dimensionsSpec,
transformSpec,
indexSpec,
queryGranularity,
projectionSpecs,
numRows,
TestHelper.JSON_MAPPER
);
}
public QueryableIndex generate(
final DataSegment dataSegment,
final GeneratorSchemaInfo schemaInfo,
final DimensionsSpec dimensionsSpec,
final TransformSpec transformSpec,
final IndexSpec indexSpec,
final Granularity queryGranularity,
final List<AggregateProjectionSpec> projectionSpecs,
final int numRows,
final ObjectMapper jsonMapper
)
{
// In case we need to generate hyperUniques or json
ComplexMetrics.registerSerde(HyperUniquesSerde.TYPE_NAME, new HyperUniquesSerde());
@ -149,6 +217,8 @@ public class SegmentGenerator implements Closeable
.putString(dimensionsSpec.toString(), StandardCharsets.UTF_8)
.putString(queryGranularity.toString(), StandardCharsets.UTF_8)
.putString(indexSpec.toString(), StandardCharsets.UTF_8)
.putString(transformSpec.toString(), StandardCharsets.UTF_8)
.putString(projectionSpecs.toString(), StandardCharsets.UTF_8)
.putInt(numRows)
.hash()
.toString();
@ -158,7 +228,7 @@ public class SegmentGenerator implements Closeable
if (outDir.exists()) {
try {
log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir);
return TestHelper.getTestIndexIO(ColumnConfig.DEFAULT).loadIndex(outDir);
return TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT).loadIndex(outDir);
}
catch (IOException e) {
throw new RuntimeException(e);
@ -179,22 +249,23 @@ public class SegmentGenerator implements Closeable
.withMetrics(schemaInfo.getAggsArray())
.withRollup(schemaInfo.isWithRollup())
.withQueryGranularity(queryGranularity)
.withProjections(projectionSpecs)
.build();
final List<InputRow> rows = new ArrayList<>();
final List<QueryableIndex> indexes = new ArrayList<>();
Transformer transformer = transformSpec.toTransformer();
InputRowSchema rowSchema = new InputRowSchema(
final Transformer transformer = transformSpec.toTransformer();
final InputRowSchema rowSchema = new InputRowSchema(
new TimestampSpec(null, null, null),
dimensionsSpec,
null
);
for (int i = 0; i < numRows; i++) {
Map<String, Object> raw = dataGenerator.nextRaw();
InputRow inputRow = MapInputRowParser.parse(rowSchema, raw);
InputRow transformedRow = transformer.transform(inputRow);
final Map<String, Object> raw = dataGenerator.nextRaw();
final InputRow inputRow = MapInputRowParser.parse(rowSchema, raw);
final InputRow transformedRow = transformer.transform(inputRow);
rows.add(transformedRow);
if ((i + 1) % 20000 == 0) {
@ -202,7 +273,7 @@ public class SegmentGenerator implements Closeable
}
if (rows.size() % MAX_ROWS_IN_MEMORY == 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec));
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec, jsonMapper));
rows.clear();
}
}
@ -210,7 +281,7 @@ public class SegmentGenerator implements Closeable
log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
if (rows.size() > 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec));
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec, jsonMapper));
rows.clear();
}
@ -221,9 +292,9 @@ public class SegmentGenerator implements Closeable
} else {
try {
retVal = TestHelper
.getTestIndexIO(ColumnConfig.DEFAULT)
.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT)
.loadIndex(
TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance())
TestHelper.getTestIndexMergerV9(jsonMapper, OffHeapMemorySegmentWriteOutMediumFactory.instance())
.mergeQueryableIndex(
indexes,
false,
@ -256,7 +327,82 @@ public class SegmentGenerator implements Closeable
}
public IncrementalIndex generateIncrementalIndex(
final DataSegment dataSegment,
final GeneratorSchemaInfo schemaInfo,
final DimensionsSpec dimensionsSpec,
final TransformSpec transformSpec,
final AggregatorFactory[] aggregatorFactories,
final IndexSpec indexSpec,
final Granularity queryGranularity,
final List<AggregateProjectionSpec> projectionSpecs,
final int numRows,
final ObjectMapper jsonMapper
)
{
// In case we need to generate hyperUniques or json
ComplexMetrics.registerSerde(HyperUniquesSerde.TYPE_NAME, new HyperUniquesSerde());
BuiltInTypesModule.registerHandlersAndSerde();
final String dataHash = Hashing.sha256()
.newHasher()
.putString(dataSegment.getId().toString(), StandardCharsets.UTF_8)
.putString(schemaInfo.toString(), StandardCharsets.UTF_8)
.putString(dimensionsSpec.toString(), StandardCharsets.UTF_8)
.putString(queryGranularity.toString(), StandardCharsets.UTF_8)
.putString(indexSpec.toString(), StandardCharsets.UTF_8)
.putInt(numRows)
.hash()
.toString();
log.info("Writing segment with hash[%s] to incremental index.", dataHash);
final DataGenerator dataGenerator = new DataGenerator(
schemaInfo.getColumnSchemas(),
dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
schemaInfo.getDataInterval(),
numRows
);
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
.withDimensionsSpec(dimensionsSpec)
.withMetrics(aggregatorFactories)
.withRollup(schemaInfo.isWithRollup())
.withQueryGranularity(queryGranularity)
.withProjections(projectionSpecs)
.build();
final List<InputRow> rows = new ArrayList<>();
final Transformer transformer = transformSpec.toTransformer();
final InputRowSchema rowSchema = new InputRowSchema(
new TimestampSpec(null, null, null),
dimensionsSpec,
null
);
for (int i = 0; i < numRows; i++) {
final Map<String, Object> raw = dataGenerator.nextRaw();
final InputRow inputRow = MapInputRowParser.parse(rowSchema, raw);
final InputRow transformedRow = transformer.transform(inputRow);
rows.add(transformedRow);
if ((i + 1) % 20000 == 0) {
log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
}
}
log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
return IndexBuilder
.create(jsonMapper)
.schema(indexSchema)
.tmpDir(new File(getSegmentDir(dataSegment.getId(), dataHash), ""))
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.rows(rows)
.buildIncrementalIndex();
}
public IncrementalIndex generateIncrementalIndex(
final DataSegment dataSegment,
final GeneratorSchemaInfo schemaInfo,
final Granularity granularity,
@ -320,11 +466,12 @@ public class SegmentGenerator implements Closeable
final int indexNumber,
final List<InputRow> rows,
final IncrementalIndexSchema indexSchema,
final IndexSpec indexSpec
final IndexSpec indexSpec,
final ObjectMapper jsonMapper
)
{
return IndexBuilder
.create()
.create(jsonMapper)
.schema(indexSchema)
.indexSpec(indexSpec)
.tmpDir(new File(getSegmentDir(identifier, dataHash), String.valueOf(indexNumber)))