diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/lookup/SqlReverseLookupBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/lookup/SqlReverseLookupBenchmark.java index 9db22a2ec59..38bcfdea0ee 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/lookup/SqlReverseLookupBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/lookup/SqlReverseLookupBenchmark.java @@ -20,7 +20,7 @@ package org.apache.druid.benchmark.lookup; import com.google.common.collect.ImmutableMap; -import org.apache.druid.benchmark.query.SqlBenchmark; +import org.apache.druid.benchmark.query.SqlBaseBenchmark; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; @@ -54,6 +54,7 @@ import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; import javax.annotation.Nullable; +import java.util.Collections; import java.util.concurrent.TimeUnit; /** @@ -111,10 +112,11 @@ public class SqlReverseLookupBenchmark final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, IndexSpec.DEFAULT, Granularities.NONE, 1); - final Pair sqlSystem = SqlBenchmark.createSqlSystem( + final Pair sqlSystem = SqlBaseBenchmark.createSqlSystem( ImmutableMap.of(dataSegment, index), + Collections.emptyMap(), ImmutableMap.of("benchmark-lookup", lookup), - null, + SqlBaseBenchmark.BenchmarkStorage.MMAP, closer ); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBaseBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBaseBenchmark.java new file mode 100644 index 00000000000..430a6167c33 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBaseBenchmark.java @@ -0,0 +1,559 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.inject.Injector; +import com.google.inject.Key; +import com.google.inject.multibindings.MapBinder; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.frame.FrameType; +import org.apache.druid.frame.read.FrameReader; +import org.apache.druid.frame.segment.FrameSegment; +import org.apache.druid.frame.testutil.FrameTestUtil; +import org.apache.druid.guice.ExpressionModule; +import org.apache.druid.guice.LazySingleton; +import org.apache.druid.guice.SegmentWranglerModule; +import org.apache.druid.guice.StartupInjectorBuilder; +import org.apache.druid.guice.annotations.Json; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.ExpressionProcessing; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule; +import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator; +import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctUtf8SqlAggregator; +import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchEstimateOperatorConversion; +import org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchModule; +import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchApproxQuantileSqlAggregator; +import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchObjectSqlAggregator; +import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchQuantileOperatorConversion; +import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchQuantilesOperatorConversion; +import org.apache.druid.query.aggregation.datasketches.theta.SketchModule; +import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator; +import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchEstimateOperatorConversion; +import org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketchModule; +import org.apache.druid.query.lookup.LookupExtractor; +import org.apache.druid.segment.IncrementalIndexSegment; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.PhysicalSegmentInspector; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexCursorFactory; +import org.apache.druid.segment.QueryableIndexPhysicalSegmentInspector; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.column.StringEncodingStrategy; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.FrontCodedIndexed; +import org.apache.druid.segment.generator.SegmentGenerator; +import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.join.JoinableFactoryWrapper; +import org.apache.druid.server.QueryStackTests; +import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; +import org.apache.druid.server.security.AuthConfig; +import org.apache.druid.server.security.AuthTestUtils; +import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest; +import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator; +import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule; +import org.apache.druid.sql.calcite.aggregation.SqlAggregator; +import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; +import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; +import org.apache.druid.sql.calcite.expression.builtin.QueryLookupOperatorConversion; +import org.apache.druid.sql.calcite.planner.CalciteRulesManager; +import org.apache.druid.sql.calcite.planner.CatalogResolver; +import org.apache.druid.sql.calcite.planner.DruidOperatorTable; +import org.apache.druid.sql.calcite.planner.DruidPlanner; +import org.apache.druid.sql.calcite.planner.PlannerConfig; +import org.apache.druid.sql.calcite.planner.PlannerFactory; +import org.apache.druid.sql.calcite.planner.PlannerResult; +import org.apache.druid.sql.calcite.run.SqlEngine; +import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.calcite.util.LookylooModule; +import org.apache.druid.sql.calcite.util.QueryFrameworkUtils; +import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule; +import org.apache.druid.sql.hook.DruidHookDispatcher; +import org.apache.druid.timeline.DataSegment; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + + +@State(Scope.Benchmark) +public class SqlBaseBenchmark +{ + private static final Logger log = new Logger(SqlBaseBenchmark.class); + private static final String STORAGE_MMAP = "MMAP"; + private static final String STORAGE_INCREMENTAL = "INCREMENTAL"; + private static final String STORAGE_FRAME_ROW = "FRAME_ROW"; + private static final String STORAGE_FRAME_COLUMNAR = "FRAME_COLUMNAR"; + + static { + NullHandling.initializeForTests(); + ExpressionProcessing.initializeForTests(); + HllSketchModule.registerSerde(); + SketchModule.registerSerde(); + DoublesSketchModule.registerSerde(); + + CalciteTests.getJsonMapper() + .registerModules(new HllSketchModule().getJacksonModules()) + .registerModules(new SketchModule().getJacksonModules()) + .registerModules(new DoublesSketchModule().getJacksonModules()) + .registerModules(new ArrayOfDoublesSketchModule().getJacksonModules()); + } + + public enum BenchmarkStorage + { + MMAP, + INCREMENTAL, + FRAME_COLUMNAR, + FRAME_ROW + } + + public enum BenchmarkStringEncodingStrategy + { + UTF8, + FRONT_CODED_DEFAULT_V1, + FRONT_CODED_16_V1 + } + + + @Param({"1500000"}) + protected int rowsPerSegment; + + @Param({ + "false", + "force" + }) + protected String vectorize; + + @Param({ + "UTF8", + "FRONT_CODED_DEFAULT_V1", + "FRONT_CODED_16_V1" + }) + protected BenchmarkStringEncodingStrategy stringEncoding; + + @Param({ + "none", + "lz4" + }) + protected String complexCompression; + + @Param({ + "explicit", + "auto" + }) + protected String schemaType; + + // Can be STORAGE_MMAP, STORAGE_INCREMENTAL, STORAGE_FRAME_ROW, or STORAGE_FRAME_COLUMNAR + @Param({ + STORAGE_MMAP, + STORAGE_INCREMENTAL, + STORAGE_FRAME_ROW, + STORAGE_FRAME_COLUMNAR + }) + protected BenchmarkStorage storageType; + + protected SqlEngine engine; + @Nullable + protected PlannerFactory plannerFactory; + private final Closer closer = Closer.create(); + + protected QueryContexts.Vectorize vectorizeContext; + + + public String getQuery() + { + throw new UnsupportedOperationException("getQuery not implemented"); + } + + public List getDatasources() + { + throw new UnsupportedOperationException("getDatasources not implemented"); + } + + protected Map getContext() + { + final Map context = ImmutableMap.of( + QueryContexts.VECTORIZE_KEY, vectorize, + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize + ); + return context; + } + + protected IndexSpec getIndexSpec() + { + return IndexSpec.builder() + .withStringDictionaryEncoding(getStringEncodingStrategy()) + .withComplexMetricCompression( + CompressionStrategy.valueOf(StringUtils.toUpperCase(complexCompression)) + ) + .build(); + } + + @Setup(Level.Trial) + public void setup() throws JsonProcessingException + { + vectorizeContext = QueryContexts.Vectorize.fromString(vectorize); + checkIncompatibleParameters(); + + Map realtimeSegments = new HashMap<>(); + Map segments = new HashMap<>(); + for (String dataSource : getDatasources()) { + final SqlBenchmarkDatasets.BenchmarkSchema schema; + if ("auto".equals(schemaType)) { + schema = SqlBenchmarkDatasets.getSchema(dataSource).asAutoDimensions(); + } else { + schema = SqlBenchmarkDatasets.getSchema(dataSource); + } + + for (DataSegment dataSegment : schema.getDataSegments()) { + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + log.info( + "Starting benchmark setup using cacheDir[%s], rows[%,d].", + segmentGenerator.getCacheDir(), + rowsPerSegment + ); + + if (BenchmarkStorage.INCREMENTAL == storageType) { + final IncrementalIndex index = segmentGenerator.generateIncrementalIndex( + dataSegment, + schema.getGeneratorSchemaInfo(), + schema.getDimensionsSpec(), + schema.getTransformSpec(), + schema.getAggregators(), + getIndexSpec(), + schema.getQueryGranularity(), + schema.getProjections(), + rowsPerSegment, + CalciteTests.getJsonMapper() + ); + log.info( + "Segment metadata: %s", + CalciteTests.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(index.getMetadata()) + ); + realtimeSegments.put(dataSegment, index); + } else { + final QueryableIndex index = segmentGenerator.generate( + dataSegment, + schema.getGeneratorSchemaInfo(), + schema.getDimensionsSpec(), + schema.getTransformSpec(), + getIndexSpec(), + schema.getQueryGranularity(), + schema.getProjections(), + rowsPerSegment, + CalciteTests.getJsonMapper() + ); + log.info( + "Segment metadata: %s", + CalciteTests.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(index.getMetadata()) + ); + segments.put(dataSegment, index); + } + } + } + + final Pair sqlSystem = createSqlSystem( + segments, + realtimeSegments, + Collections.emptyMap(), + storageType, + closer + ); + + plannerFactory = sqlSystem.lhs; + engine = sqlSystem.rhs; + final ObjectMapper jsonMapper = CalciteTests.getJsonMapper(); + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting( + engine, + "EXPLAIN PLAN FOR " + getQuery(), + ImmutableMap.builder() + .putAll(getContext()) + .put( + "useNativeQueryExplain", + true + ) + .build() + )) { + final PlannerResult plannerResult = planner.plan(); + final Sequence resultSequence = plannerResult.run().getResults(); + final Object[] planResult = resultSequence.toList().get(0); + log.info("Native query plan:\n" + + jsonMapper.writerWithDefaultPrettyPrinter() + .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) + ); + } + catch (JsonProcessingException ex) { + log.warn(ex, "explain failed"); + } + + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, getQuery(), getContext())) { + final PlannerResult plannerResult = planner.plan(); + final Sequence resultSequence = plannerResult.run().getResults(); + final int rowCount = resultSequence.toList().size(); + log.info("Total result row count:" + rowCount); + } + catch (Throwable ex) { + log.warn(ex, "failed to count rows"); + } + + + if (vectorizeContext.shouldVectorize(true)) { + try { + SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( + engine, + plannerFactory, + getQuery() + ); + log.info("non-vectorized and vectorized results match"); + } + catch (Throwable ex) { + log.warn(ex, "non-vectorized and vectorized results do not match"); + } + } + } + + private void checkIncompatibleParameters() + { + // if running as fork 0, maybe don't use these combinations since it will kill everything + if (stringEncoding != BenchmarkStringEncodingStrategy.UTF8 && storageType != BenchmarkStorage.MMAP) { + System.exit(0); + } + // complex compression only applies to mmap segments, dont bother otherwise + if (!"none".equals(complexCompression) && storageType != BenchmarkStorage.MMAP) { + System.exit(0); + } + // vectorize only works for mmap and frame column segments, bail out if + if (vectorizeContext.shouldVectorize(true) && !(storageType == BenchmarkStorage.MMAP || storageType == BenchmarkStorage.FRAME_COLUMNAR)) { + System.exit(0); + } + } + + private StringEncodingStrategy getStringEncodingStrategy() + { + if (stringEncoding == BenchmarkStringEncodingStrategy.FRONT_CODED_DEFAULT_V1) { + return new StringEncodingStrategy.FrontCoded(null, FrontCodedIndexed.V1); + } else if (stringEncoding == BenchmarkStringEncodingStrategy.FRONT_CODED_16_V1) { + return new StringEncodingStrategy.FrontCoded(16, FrontCodedIndexed.V1); + } else { + return new StringEncodingStrategy.Utf8(); + } + } + + public static Pair createSqlSystem( + final Map segmentMap, + final Map realtimeSegmentsMap, + final Map lookupMap, + @Nullable final BenchmarkStorage storageType, + final Closer closer + ) + { + final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer); + final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate); + final PlannerConfig plannerConfig = new PlannerConfig(); + + for (final Map.Entry segmentEntry : segmentMap.entrySet()) { + addSegmentToWalker(walker, segmentEntry.getKey(), segmentEntry.getValue(), storageType); + } + + for (final Map.Entry segmentEntry : realtimeSegmentsMap.entrySet()) { + walker.add( + segmentEntry.getKey(), + new IncrementalIndexSegment(segmentEntry.getValue(), segmentEntry.getKey().getId()) + ); + } + + // Child injector that adds additional lookups. + final Injector injector = new StartupInjectorBuilder() + .withEmptyProperties() + .add( + new ExpressionModule(), + new SegmentWranglerModule(), + new LookylooModule(), + new SqlAggregationModule(), + new CalciteTestOperatorModule(), + binder -> { + for (Map.Entry entry : lookupMap.entrySet()) { + MapBinder.newMapBinder(binder, String.class, LookupExtractor.class) + .addBinding(entry.getKey()) + .toProvider(entry::getValue) + .in(LazySingleton.class); + } + }, + new HllSketchModule(), + new SketchModule(), + new DoublesSketchModule(), + binder -> { + + } + ) + .build(); + ObjectMapper injected = injector.getInstance(Key.get(ObjectMapper.class, Json.class)); + injected.registerModules(new HllSketchModule().getJacksonModules()); + + final DruidSchemaCatalog rootSchema = + QueryFrameworkUtils.createMockRootSchema( + injector, + conglomerate, + walker, + plannerConfig, + AuthTestUtils.TEST_AUTHORIZER_MAPPER + ); + + final SqlEngine engine = CalciteTests.createMockSqlEngine(walker, conglomerate); + + final PlannerFactory plannerFactory = new PlannerFactory( + rootSchema, + createOperatorTable(injector), + injector.getInstance(ExprMacroTable.class), + plannerConfig, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + injector.getInstance(Key.get(ObjectMapper.class, Json.class)), + CalciteTests.DRUID_SCHEMA_NAME, + new CalciteRulesManager(ImmutableSet.of()), + new JoinableFactoryWrapper(QueryFrameworkUtils.createDefaultJoinableFactory(injector)), + CatalogResolver.NULL_RESOLVER, + new AuthConfig(), + new DruidHookDispatcher() + ); + + return Pair.of(plannerFactory, engine); + } + + private static void addSegmentToWalker( + final SpecificSegmentsQuerySegmentWalker walker, + final DataSegment descriptor, + final QueryableIndex index, + @Nullable final BenchmarkStorage storageType + ) + { + if (storageType == null || BenchmarkStorage.MMAP == storageType) { + walker.add(descriptor, new QueryableIndexSegment(index, descriptor.getId())); + } else if (BenchmarkStorage.FRAME_ROW == storageType) { + QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index); + walker.add( + descriptor, + new FrameSegment( + FrameTestUtil.cursorFactoryToFrame(cursorFactory, FrameType.ROW_BASED), + FrameReader.create(cursorFactory.getRowSignature()), + descriptor.getId() + ) + { + @Nullable + @Override + public T as(@Nonnull Class clazz) + { + // computed sql schema uses segment metadata, which relies on physical inspector, use the underlying index + if (clazz.equals(PhysicalSegmentInspector.class)) { + return (T) new QueryableIndexPhysicalSegmentInspector(index); + } + return super.as(clazz); + } + } + ); + } else if (BenchmarkStorage.FRAME_COLUMNAR == storageType) { + QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index); + walker.add( + descriptor, + new FrameSegment( + FrameTestUtil.cursorFactoryToFrame(cursorFactory, FrameType.COLUMNAR), + FrameReader.create(cursorFactory.getRowSignature()), + descriptor.getId() + ) + { + @Nullable + @Override + public T as(@Nonnull Class clazz) + { + // computed sql schema uses segment metadata, which relies on physical inspector, use the underlying index + if (clazz.equals(PhysicalSegmentInspector.class)) { + return (T) new QueryableIndexPhysicalSegmentInspector(index); + } + return super.as(clazz); + } + } + ); + } else { + throw new IAE("Invalid storageType[%s]", storageType); + } + } + + private static DruidOperatorTable createOperatorTable(final Injector injector) + { + try { + final Set operators = new HashSet<>(); + operators.add(injector.getInstance(QueryLookupOperatorConversion.class)); + operators.addAll( + ImmutableList.of( + new HllSketchEstimateOperatorConversion(), + new ThetaSketchEstimateOperatorConversion(), + new DoublesSketchQuantileOperatorConversion(), + new DoublesSketchQuantilesOperatorConversion() + ) + ); + final ApproxCountDistinctSqlAggregator countDistinctSqlAggregator = + new ApproxCountDistinctSqlAggregator(new HllSketchApproxCountDistinctSqlAggregator()); + final Set aggregators = new HashSet<>( + ImmutableList.of( + new DoublesSketchApproxQuantileSqlAggregator(), + new DoublesSketchObjectSqlAggregator(), + new HllSketchApproxCountDistinctSqlAggregator(), + new HllSketchApproxCountDistinctUtf8SqlAggregator(), + new ThetaSketchApproxCountDistinctSqlAggregator(), + new CountSqlAggregator(countDistinctSqlAggregator), + countDistinctSqlAggregator + ) + ); + return new DruidOperatorTable(aggregators, operators); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception + { + closer.close(); + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBasePlanBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBasePlanBenchmark.java new file mode 100644 index 00000000000..e3ef0c67399 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBasePlanBenchmark.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.sql.calcite.planner.DruidPlanner; +import org.apache.druid.sql.calcite.planner.PlannerResult; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class SqlBasePlanBenchmark extends SqlBaseBenchmark +{ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void planSql(Blackhole blackhole) + { + final Map context = ImmutableMap.of( + QueryContexts.VECTORIZE_KEY, vectorize, + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize + ); + final String sql = getQuery(); + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { + final PlannerResult plannerResult = planner.plan(); + blackhole.consume(plannerResult); + } + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBaseQueryBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBaseQueryBenchmark.java new file mode 100644 index 00000000000..48a8b49b5bc --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBaseQueryBenchmark.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.sql.calcite.planner.DruidPlanner; +import org.apache.druid.sql.calcite.planner.PlannerResult; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class SqlBaseQueryBenchmark extends SqlBaseBenchmark +{ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void querySql(Blackhole blackhole) + { + final Map context = getContext(); + final String sql = getQuery(); + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { + final PlannerResult plannerResult = planner.plan(); + final Sequence resultSequence = plannerResult.run().getResults(); + final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); + } + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java index 0e38757f12b..09a54698de0 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -19,101 +19,15 @@ package org.apache.druid.benchmark.query; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.inject.Injector; -import com.google.inject.Key; -import com.google.inject.multibindings.MapBinder; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.frame.FrameType; -import org.apache.druid.frame.testutil.FrameTestUtil; -import org.apache.druid.guice.ExpressionModule; -import org.apache.druid.guice.LazySingleton; -import org.apache.druid.guice.SegmentWranglerModule; -import org.apache.druid.guice.StartupInjectorBuilder; -import org.apache.druid.guice.annotations.Json; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.guava.Yielder; -import org.apache.druid.java.util.common.guava.Yielders; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.math.expr.ExprMacroTable; -import org.apache.druid.query.QueryContexts; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator; -import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctUtf8SqlAggregator; -import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchApproxQuantileSqlAggregator; -import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchObjectSqlAggregator; -import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator; -import org.apache.druid.query.lookup.LookupExtractor; -import org.apache.druid.segment.AutoTypeColumnSchema; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.QueryableIndexCursorFactory; -import org.apache.druid.segment.QueryableIndexSegment; -import org.apache.druid.segment.column.StringEncodingStrategy; -import org.apache.druid.segment.data.FrontCodedIndexed; -import org.apache.druid.segment.generator.GeneratorBasicSchemas; -import org.apache.druid.segment.generator.GeneratorSchemaInfo; -import org.apache.druid.segment.generator.SegmentGenerator; -import org.apache.druid.segment.join.JoinableFactoryWrapper; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.server.QueryStackTests; -import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; -import org.apache.druid.server.security.AuthConfig; -import org.apache.druid.server.security.AuthTestUtils; -import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator; -import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule; -import org.apache.druid.sql.calcite.aggregation.SqlAggregator; -import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; -import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; -import org.apache.druid.sql.calcite.expression.builtin.QueryLookupOperatorConversion; -import org.apache.druid.sql.calcite.planner.CalciteRulesManager; -import org.apache.druid.sql.calcite.planner.CatalogResolver; -import org.apache.druid.sql.calcite.planner.DruidOperatorTable; -import org.apache.druid.sql.calcite.planner.DruidPlanner; -import org.apache.druid.sql.calcite.planner.PlannerConfig; -import org.apache.druid.sql.calcite.planner.PlannerFactory; -import org.apache.druid.sql.calcite.planner.PlannerResult; -import org.apache.druid.sql.calcite.run.SqlEngine; -import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog; -import org.apache.druid.sql.calcite.util.CalciteTests; -import org.apache.druid.sql.calcite.util.LookylooModule; -import org.apache.druid.sql.calcite.util.QueryFrameworkUtils; -import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule; -import org.apache.druid.sql.hook.DruidHookDispatcher; -import org.apache.druid.timeline.DataSegment; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; -import javax.annotation.Nullable; -import java.util.Collections; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; /** * Benchmark that tests various SQL queries. @@ -122,40 +36,30 @@ import java.util.stream.Collectors; @Fork(value = 1) @Warmup(iterations = 3) @Measurement(iterations = 5) -public class SqlBenchmark +public class SqlBenchmark extends SqlBaseQueryBenchmark { - static { - NullHandling.initializeForTests(); - } - - private static final Logger log = new Logger(SqlBenchmark.class); - - private static final String STORAGE_MMAP = "mmap"; - private static final String STORAGE_FRAME_ROW = "frame-row"; - private static final String STORAGE_FRAME_COLUMNAR = "frame-columnar"; - - private static final List QUERIES = ImmutableList.of( + static final List QUERIES = ImmutableList.of( // 0, 1, 2, 3: Timeseries, unfiltered - "SELECT COUNT(*) FROM foo", - "SELECT COUNT(DISTINCT hyper) FROM foo", - "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo", - "SELECT FLOOR(__time TO MINUTE), SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo GROUP BY 1", + "SELECT COUNT(*) FROM druid.basic", + "SELECT APPROX_COUNT_DISTINCT_BUILTIN(hyper) FROM druid.basic", + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic", + "SELECT FLOOR(__time TO MINUTE), SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic GROUP BY 1", // 4: Timeseries, low selectivity filter (90% of rows match) - "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential NOT LIKE '%3'", + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic WHERE dimSequential NOT LIKE '%3'", // 5: Timeseries, high selectivity filter (0.1% of rows match) - "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential = '311'", + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic WHERE dimSequential = '311'", // 6: Timeseries, mixing low selectivity index-capable filter (90% of rows match) + cursor filter - "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo\n" + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM druid.basic\n" + "WHERE dimSequential NOT LIKE '%3' AND maxLongUniform > 10", // 7: Timeseries, low selectivity toplevel filter (90%), high selectivity filtered aggregator (0.1%) "SELECT\n" + " SUM(sumLongSequential) FILTER(WHERE dimSequential = '311'),\n" + " SUM(sumFloatNormal)\n" - + "FROM foo\n" + + "FROM druid.basic\n" + "WHERE dimSequential NOT LIKE '%3'", // 8: Timeseries, no toplevel filter, various filtered aggregators with clauses repeated. @@ -175,7 +79,7 @@ public class SqlBenchmark + " COUNT(*) FILTER(WHERE dimSequential LIKE '%3'),\n" + " COUNT(*) FILTER(WHERE dimSequential NOT LIKE '%3'),\n" + " COUNT(*)\n" - + "FROM foo", + + "FROM druid.basic", // 9: Timeseries, toplevel time filter, time-comparison filtered aggregators "SELECT\n" @@ -183,233 +87,233 @@ public class SqlBenchmark + " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-01 12:00:00'),\n" + " SUM(sumLongSequential)\n" + " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 12:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00')\n" - + "FROM foo\n" + + "FROM druid.basic\n" + "WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00'", // 10, 11: GroupBy two strings, unfiltered, unordered - "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo GROUP BY 1, 2", - "SELECT dimSequential, dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1, 2", + "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic GROUP BY 1, 2", + "SELECT dimSequential, dimZipf, SUM(sumLongSequential), COUNT(*) FROM druid.basic GROUP BY 1, 2", // 12, 13, 14: GroupBy one string, unfiltered, various aggregator configurations - "SELECT dimZipf FROM foo GROUP BY 1", - "SELECT dimZipf, COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC", - "SELECT dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC", + "SELECT dimZipf FROM druid.basic GROUP BY 1", + "SELECT dimZipf, COUNT(*) FROM druid.basic GROUP BY 1 ORDER BY COUNT(*) DESC", + "SELECT dimZipf, SUM(sumLongSequential), COUNT(*) FROM druid.basic GROUP BY 1 ORDER BY COUNT(*) DESC", // 15, 16: GroupBy long, unfiltered, unordered; with and without aggregators - "SELECT maxLongUniform FROM foo GROUP BY 1", - "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1", + "SELECT maxLongUniform FROM druid.basic GROUP BY 1", + "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM druid.basic GROUP BY 1", // 17, 18: GroupBy long, filter by long, unordered; with and without aggregators - "SELECT maxLongUniform FROM foo WHERE maxLongUniform > 10 GROUP BY 1", - "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo WHERE maxLongUniform > 10 GROUP BY 1", + "SELECT maxLongUniform FROM druid.basic WHERE maxLongUniform > 10 GROUP BY 1", + "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM druid.basic WHERE maxLongUniform > 10 GROUP BY 1", // 19: ultra mega union matrix "WITH matrix (dimZipf, dimSequential) AS (\n" + " (\n" + " SELECT '100', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '100'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '110', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '110'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '120', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '120'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '130', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '130'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '140', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '140'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '150', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '150'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '160', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '160'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '170', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '170'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '180', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '180'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '190', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '190'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '200', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '200'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '210', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '210'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '220', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '220'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '230', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '230'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '240', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '240'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '250', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '250'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '260', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '260'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '270', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '270'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '280', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '280'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '290', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '290'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '300', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '300'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '310', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '310'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '320', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '320'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '330', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '330'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '340', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '340'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '350', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '350'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '360', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '360'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '370', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '370'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT '380', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE dimZipf = '380'\n" + " GROUP BY dimSequential\n" + " )\n" + "UNION ALL\n" + " (\n" + " SELECT 'other', dimSequential\n" - + " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n" + + " FROM (SELECT * FROM druid.basic WHERE dimUniform != 1)\n" + " WHERE\n" + " dimZipf NOT IN (\n" + " '100', '110', '120', '130', '140', '150', '160', '170', '180', '190',\n" @@ -423,65 +327,45 @@ public class SqlBenchmark // 20: GroupBy, doubles sketches "SELECT dimZipf, APPROX_QUANTILE_DS(sumFloatNormal, 0.5), DS_QUANTILES_SKETCH(maxLongUniform) " - + "FROM foo " + + "FROM druid.basic " + "GROUP BY 1", // 21, 22: stringy stuff - "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimUniform NOT LIKE '%3' GROUP BY 1, 2", - "SELECT dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential = '311' GROUP BY 1 ORDER BY 1", + "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimUniform NOT LIKE '%3' GROUP BY 1, 2", + "SELECT dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimSequential = '311' GROUP BY 1 ORDER BY 1", // 23: full scan - "SELECT * FROM foo", - "SELECT * FROM foo WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100')", - "SELECT * FROM foo WHERE dimSequential > '10' AND dimSequential < '8500'", - "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100') GROUP BY 1, 2", - "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential > '10' AND dimSequential < '8500' GROUP BY 1, 2", + "SELECT * FROM druid.basic", + "SELECT * FROM druid.basic WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100')", + "SELECT * FROM druid.basic WHERE dimSequential > '10' AND dimSequential < '8500'", + "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100') GROUP BY 1, 2", + "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM druid.basic WHERE dimSequential > '10' AND dimSequential < '8500' GROUP BY 1, 2", // 28, 29, 30, 31: Approximate count distinct of strings - "SELECT APPROX_COUNT_DISTINCT_BUILTIN(dimZipf) FROM foo", - "SELECT APPROX_COUNT_DISTINCT_DS_HLL(dimZipf) FROM foo", - "SELECT APPROX_COUNT_DISTINCT_DS_HLL_UTF8(dimZipf) FROM foo", - "SELECT APPROX_COUNT_DISTINCT_DS_THETA(dimZipf) FROM foo", + "SELECT APPROX_COUNT_DISTINCT_BUILTIN(dimZipf) FROM druid.basic", + "SELECT APPROX_COUNT_DISTINCT_DS_HLL(dimZipf) FROM druid.basic", + "SELECT APPROX_COUNT_DISTINCT_DS_HLL_UTF8(dimZipf) FROM druid.basic", + "SELECT APPROX_COUNT_DISTINCT_DS_THETA(dimZipf) FROM druid.basic", // 32: LATEST aggregator long - "SELECT LATEST(long1) FROM foo", + "SELECT LATEST(long1) FROM druid.expressions", // 33: LATEST aggregator double - "SELECT LATEST(double4) FROM foo", + "SELECT LATEST(double4) FROM druid.expressions", // 34: LATEST aggregator double - "SELECT LATEST(float3) FROM foo", + "SELECT LATEST(float3) FROM druid.expressions", // 35: LATEST aggregator double - "SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM foo", + "SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM druid.expressions", // 36,37: filter numeric nulls - "SELECT SUM(long5) FROM foo WHERE long5 IS NOT NULL", - "SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1", + "SELECT SUM(long5) FROM druid.expressions WHERE long5 IS NOT NULL", + "SELECT string2, SUM(long5) FROM druid.expressions WHERE long5 IS NOT NULL GROUP BY 1", // 38: EARLIEST aggregator long - "SELECT EARLIEST(long1) FROM foo", + "SELECT EARLIEST(long1) FROM druid.expressions", // 39: EARLIEST aggregator double - "SELECT EARLIEST(double4) FROM foo", + "SELECT EARLIEST(double4) FROM druid.expressions", // 40: EARLIEST aggregator float - "SELECT EARLIEST(float3) FROM foo", + "SELECT EARLIEST(float3) FROM druid.expressions", // 41: nested OR filter - "SELECT dimSequential, COUNT(*) from foo WHERE dimSequential = '1' AND (dimMultivalEnumerated IN ('Hello', 'World', 'Foo', 'Bar', 'Baz') OR sumLongSequential = 1) GROUP BY 1" + "SELECT dimSequential, COUNT(*) from druid.basic WHERE dimSequential = '1' AND (dimMultivalEnumerated IN ('Hello', 'World', 'druid.basic', 'Bar', 'Baz') OR sumLongSequential = 1) GROUP BY 1" ); - @Param({"5000000"}) - private int rowsPerSegment; - - // Can be "false", "true", or "force" - @Param({"false", "force"}) - private String vectorize; - - // Can be "none" or "front-coded-N" - @Param({ - "none", - "front-coded-4" - }) - private String stringEncoding; - - @Param({ - "explicit", - "auto" - }) - private String schema; - @Param({ "0", "1", @@ -528,254 +412,15 @@ public class SqlBenchmark }) private String query; - // Can be STORAGE_MMAP, STORAGE_FRAME_ROW, or STORAGE_FRAME_COLUMNAR - @Param({STORAGE_MMAP}) - private String storageType; - - private SqlEngine engine; - - @Nullable - private PlannerFactory plannerFactory; - private final Closer closer = Closer.create(); - - @Setup(Level.Trial) - public void setup() + @Override + public String getQuery() { - final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("basic"); - final DataSegment dataSegment = schemaInfo.makeSegmentDescriptor("foo"); - final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); - - log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment); - final QueryableIndex index; - if ("auto".equals(schema)) { - List columnSchemas = schemaInfo.getDimensionsSpec() - .getDimensions() - .stream() - .map(x -> new AutoTypeColumnSchema(x.getName(), null)) - .collect(Collectors.toList()); - index = segmentGenerator.generate( - dataSegment, - schemaInfo, - DimensionsSpec.builder().setDimensions(columnSchemas).build(), - TransformSpec.NONE, - IndexSpec.builder().withStringDictionaryEncoding(getStringEncodingStrategy()).build(), - Granularities.NONE, - rowsPerSegment - ); - } else { - index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); - } - - final Pair sqlSystem = createSqlSystem( - ImmutableMap.of(dataSegment, index), - Collections.emptyMap(), - null, - closer - ); - - plannerFactory = sqlSystem.lhs; - engine = sqlSystem.rhs; - - final String sql = QUERIES.get(Integer.parseInt(query)); - final ObjectMapper jsonMapper = CalciteTests.getJsonMapper(); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] planResult = resultSequence.toList().get(0); - log.info("Native query plan:\n" + - jsonMapper.writerWithDefaultPrettyPrinter() - .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) - ); - } - catch (JsonProcessingException ignored) { - - } - - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Yielder yielder = Yielders.each(resultSequence); - int rowCounter = 0; - while (!yielder.isDone()) { - rowCounter++; - yielder.next(yielder.get()); - } - log.info("Total result row count:" + rowCounter); - } - catch (Throwable ignored) { - - } + return QUERIES.get(Integer.parseInt(query)); } - private StringEncodingStrategy getStringEncodingStrategy() + @Override + public List getDatasources() { - if (stringEncoding.startsWith("front-coded")) { - String[] split = stringEncoding.split("-"); - int bucketSize = Integer.parseInt(split[2]); - return new StringEncodingStrategy.FrontCoded(bucketSize, FrontCodedIndexed.V1); - } else { - return new StringEncodingStrategy.Utf8(); - } - } - - public static Pair createSqlSystem( - final Map segmentMap, - final Map lookupMap, - @Nullable final String storageType, - final Closer closer - ) - { - final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer); - final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate); - final PlannerConfig plannerConfig = new PlannerConfig(); - - for (final Map.Entry segmentEntry : segmentMap.entrySet()) { - addSegmentToWalker(walker, segmentEntry.getKey(), segmentEntry.getValue(), storageType); - } - - // Child injector that adds additional lookups. - final Injector injector = new StartupInjectorBuilder() - .withEmptyProperties() - .add( - new ExpressionModule(), - new SegmentWranglerModule(), - new LookylooModule(), - new SqlAggregationModule(), - new CalciteTestOperatorModule(), - binder -> { - for (Map.Entry entry : lookupMap.entrySet()) { - MapBinder.newMapBinder(binder, String.class, LookupExtractor.class) - .addBinding(entry.getKey()) - .toProvider(entry::getValue) - .in(LazySingleton.class); - } - } - ) - .build(); - - final DruidSchemaCatalog rootSchema = - QueryFrameworkUtils.createMockRootSchema( - injector, - conglomerate, - walker, - plannerConfig, - AuthTestUtils.TEST_AUTHORIZER_MAPPER - ); - - final SqlEngine engine = CalciteTests.createMockSqlEngine(walker, conglomerate); - - final PlannerFactory plannerFactory = new PlannerFactory( - rootSchema, - createOperatorTable(injector), - injector.getInstance(ExprMacroTable.class), - plannerConfig, - AuthTestUtils.TEST_AUTHORIZER_MAPPER, - injector.getInstance(Key.get(ObjectMapper.class, Json.class)), - CalciteTests.DRUID_SCHEMA_NAME, - new CalciteRulesManager(ImmutableSet.of()), - new JoinableFactoryWrapper(QueryFrameworkUtils.createDefaultJoinableFactory(injector)), - CatalogResolver.NULL_RESOLVER, - new AuthConfig(), - new DruidHookDispatcher() - ); - - return Pair.of(plannerFactory, engine); - } - - private static void addSegmentToWalker( - final SpecificSegmentsQuerySegmentWalker walker, - final DataSegment descriptor, - final QueryableIndex index, - @Nullable final String storageType - ) - { - if (storageType == null || STORAGE_MMAP.equals(storageType)) { - walker.add(descriptor, new QueryableIndexSegment(index, descriptor.getId())); - } else if (STORAGE_FRAME_ROW.equals(storageType)) { - walker.add( - descriptor, - FrameTestUtil.cursorFactoryToFrameSegment( - new QueryableIndexCursorFactory(index), - FrameType.ROW_BASED, - descriptor.getId() - ) - ); - } else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) { - walker.add( - descriptor, - FrameTestUtil.cursorFactoryToFrameSegment( - new QueryableIndexCursorFactory(index), - FrameType.COLUMNAR, - descriptor.getId() - ) - ); - } else { - throw new IAE("Invalid storageType[%s]", storageType); - } - } - - private static DruidOperatorTable createOperatorTable(final Injector injector) - { - try { - final Set extractionOperators = new HashSet<>(); - extractionOperators.add(injector.getInstance(QueryLookupOperatorConversion.class)); - final ApproxCountDistinctSqlAggregator countDistinctSqlAggregator = - new ApproxCountDistinctSqlAggregator(new HllSketchApproxCountDistinctSqlAggregator()); - final Set aggregators = new HashSet<>( - ImmutableList.of( - new DoublesSketchApproxQuantileSqlAggregator(), - new DoublesSketchObjectSqlAggregator(), - new HllSketchApproxCountDistinctSqlAggregator(), - new HllSketchApproxCountDistinctUtf8SqlAggregator(), - new ThetaSketchApproxCountDistinctSqlAggregator(), - new CountSqlAggregator(countDistinctSqlAggregator), - countDistinctSqlAggregator - ) - ); - return new DruidOperatorTable(aggregators, extractionOperators); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - @TearDown(Level.Trial) - public void tearDown() throws Exception - { - closer.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void querySql(Blackhole blackhole) - { - final Map context = ImmutableMap.of( - QueryContexts.VECTORIZE_KEY, vectorize, - QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize - ); - final String sql = QUERIES.get(Integer.parseInt(query)); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); - blackhole.consume(lastRow); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void planSql(Blackhole blackhole) - { - final Map context = ImmutableMap.of( - QueryContexts.VECTORIZE_KEY, vectorize, - QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize - ); - final String sql = QUERIES.get(Integer.parseInt(query)); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { - final PlannerResult plannerResult = planner.plan(); - blackhole.consume(plannerResult); - } + return ImmutableList.of(SqlBenchmarkDatasets.BASIC); } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmarkDatasets.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmarkDatasets.java new file mode 100644 index 00000000000..7b6b48802f3 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmarkDatasets.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import org.apache.druid.data.input.impl.AggregateProjectionSpec; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.AutoTypeColumnSchema; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.generator.GeneratorBasicSchemas; +import org.apache.druid.segment.generator.GeneratorSchemaInfo; +import org.apache.druid.segment.transform.ExpressionTransform; +import org.apache.druid.segment.transform.TransformSpec; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.joda.time.Interval; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class SqlBenchmarkDatasets +{ + private static Map DATASET_SCHEMAS = new HashMap<>(); + + public static String BASIC = "basic"; + public static String EXPRESSIONS = "expressions"; + public static String NESTED = "nested"; + public static String DATASKETCHES = "datasketches"; + public static String PROJECTIONS = "projections"; + public static String GROUPER = "grouper"; + + // initialize all benchmark dataset schemas to feed the data generators when running benchmarks, add any additional + // datasets to this initializer as needed and they will be available to any benchmarks at the table name added here + static { + // the classic 'basic' schema, string dimension oriented with a few metrics + final GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get(GeneratorBasicSchemas.BASIC_SCHEMA); + DATASET_SCHEMAS.put( + BASIC, + new BenchmarkSchema( + Collections.singletonList(makeSegment(BASIC, basicSchema.getDataInterval())), + basicSchema, + TransformSpec.NONE, + makeDimensionsSpec(basicSchema), + basicSchema.getAggsArray(), + Collections.emptyList(), + Granularities.NONE + ) + ); + + // expression testbench schema, lots of different column types + final GeneratorSchemaInfo expressionsSchema = GeneratorBasicSchemas.SCHEMA_MAP.get( + GeneratorBasicSchemas.EXPRESSION_TESTBENCH_SCHEMA + ); + DATASET_SCHEMAS.put( + EXPRESSIONS, + new BenchmarkSchema( + Collections.singletonList(makeSegment(EXPRESSIONS, expressionsSchema.getDataInterval())), + expressionsSchema, + TransformSpec.NONE, + makeDimensionsSpec(expressionsSchema), + expressionsSchema.getAggsArray(), + Collections.emptyList(), + Granularities.NONE + ) + ); + + // expressions schema but with transform to create nested column + DATASET_SCHEMAS.put( + NESTED, + new BenchmarkSchema( + Collections.singletonList(makeSegment(NESTED, expressionsSchema.getDataInterval())), + expressionsSchema, + new TransformSpec( + null, + ImmutableList.of( + new ExpressionTransform( + "nested", + "json_object('long1', long1, 'nesteder', json_object('string1', string1, 'long2', long2, 'double3',double3, 'string5', string5))", + TestExprMacroTable.INSTANCE + ) + ) + ), + DimensionsSpec.builder().setDimensions( + ImmutableList.copyOf( + Iterables.concat( + expressionsSchema.getDimensionsSpecExcludeAggs().getDimensions(), + Collections.singletonList(new AutoTypeColumnSchema("nested", null)) + ) + ) + ).build(), + expressionsSchema.getAggsArray(), + Collections.emptyList(), + Granularities.NONE + ) + ); + + // expressions schema but with some datasketch aggs defined + GeneratorSchemaInfo datasketchesSchema = new GeneratorSchemaInfo( + expressionsSchema.getColumnSchemas(), + ImmutableList.of( + new HllSketchBuildAggregatorFactory("hll_string5", "string5", null, null, null, false, true), + new SketchMergeAggregatorFactory("theta_string5", "string5", null, null, null, null), + new DoublesSketchAggregatorFactory("quantiles_float4", "float4", null, null, null), + new DoublesSketchAggregatorFactory("quantiles_long3", "long3", null, null, null) + ), + expressionsSchema.getDataInterval(), + true + ); + DATASET_SCHEMAS.put( + DATASKETCHES, + new BenchmarkSchema( + Collections.singletonList(makeSegment(DATASKETCHES, datasketchesSchema.getDataInterval())), + datasketchesSchema, + TransformSpec.NONE, + makeDimensionsSpec(datasketchesSchema), + datasketchesSchema.getAggsArray(), + Collections.emptyList(), + Granularities.NONE + ) + ); + + // expressions schema with projections + DATASET_SCHEMAS.put( + PROJECTIONS, + new BenchmarkSchema( + Collections.singletonList(makeSegment(PROJECTIONS, expressionsSchema.getDataInterval())), + expressionsSchema, + TransformSpec.NONE, + makeDimensionsSpec(expressionsSchema), + expressionsSchema.getAggsArray(), + Arrays.asList( + new AggregateProjectionSpec( + "string2_hourly_sums_hll", + VirtualColumns.create( + Granularities.toVirtualColumn(Granularities.HOUR, "__gran") + ), + Arrays.asList( + new StringDimensionSchema("string2"), + new LongDimensionSchema("__gran") + ), + new AggregatorFactory[]{ + new LongSumAggregatorFactory("long4_sum", "long4"), + new DoubleSumAggregatorFactory("double2_sum", "double2"), + new HllSketchBuildAggregatorFactory("hll_string5", "string5", null, null, null, false, true) + } + ), + new AggregateProjectionSpec( + "string2_long2_sums", + VirtualColumns.EMPTY, + Arrays.asList( + new StringDimensionSchema("string2"), + new LongDimensionSchema("long2") + ), + new AggregatorFactory[]{ + new LongSumAggregatorFactory("long4_sum", "long4"), + new DoubleSumAggregatorFactory("double2_sum", "double2"), + new HllSketchBuildAggregatorFactory("hll_string5", "string5", null, null, null, false, true) + } + ) + ), + Granularities.NONE + ) + ); + + // group-by testing, 2 segments + final GeneratorSchemaInfo groupingSchema = GeneratorBasicSchemas.SCHEMA_MAP.get( + GeneratorBasicSchemas.GROUPBY_TESTBENCH_SCHEMA + ); + DATASET_SCHEMAS.put( + GROUPER, + new BenchmarkSchema( + Arrays.asList( + makeSegment(GROUPER, groupingSchema.getDataInterval(), 0), + makeSegment(GROUPER, groupingSchema.getDataInterval(), 1) + ), + groupingSchema, + new TransformSpec( + null, + ImmutableList.of( + // string array dims + new ExpressionTransform( + "stringArray-Sequential-100_000", + "array(\"string-Sequential-100_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "stringArray-Sequential-3_000_000", + "array(\"string-Sequential-10_000_000\")", + TestExprMacroTable.INSTANCE + ), + /* + new ExpressionTransform( + "stringArray-Sequential-1_000_000_000", + "array(\"string-Sequential-1_000_000_000\")", + TestExprMacroTable.INSTANCE + ),*/ + new ExpressionTransform( + "stringArray-ZipF-1_000_000", + "array(\"string-ZipF-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "stringArray-Uniform-1_000_000", + "array(\"string-Uniform-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + + // long array dims + new ExpressionTransform( + "longArray-Sequential-100_000", + "array(\"long-Sequential-100_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "longArray-Sequential-3_000_000", + "array(\"long-Sequential-10_000_000\")", + TestExprMacroTable.INSTANCE + ), + /* + new ExpressionTransform( + "longArray-Sequential-1_000_000_000", + "array(\"long-Sequential-1_000_000_000\")", + TestExprMacroTable.INSTANCE + ),*/ + new ExpressionTransform( + "longArray-ZipF-1_000_000", + "array(\"long-ZipF-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "longArray-Uniform-1_000_000", + "array(\"long-Uniform-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + + // nested complex json dim + new ExpressionTransform( + "nested-Sequential-100_000", + "json_object('long1', \"long-Sequential-100_000\", 'nesteder', json_object('long1', \"long-Sequential-100_000\"))", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "nested-Sequential-3_000_000", + "json_object('long1', \"long-Sequential-10_000_000\", 'nesteder', json_object('long1', \"long-Sequential-10_000_000\"))", + TestExprMacroTable.INSTANCE + ), + /* + new ExpressionTransform( + "nested-Sequential-1_000_000_000", + "json_object('long1', \"long-Sequential-1_000_000_000\", 'nesteder', json_object('long1', \"long-Sequential-1_000_000_000\"))", + TestExprMacroTable.INSTANCE + ),*/ + new ExpressionTransform( + "nested-ZipF-1_000_000", + "json_object('long1', \"long-ZipF-1_000_000\", 'nesteder', json_object('long1', \"long-ZipF-1_000_000\"))", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "nested-Uniform-1_000_000", + "json_object('long1', \"long-Uniform-1_000_000\", 'nesteder', json_object('long1', \"long-Uniform-1_000_000\"))", + TestExprMacroTable.INSTANCE + ) + ) + ), + makeDimensionsSpec(groupingSchema), + groupingSchema.getAggsArray(), + Collections.emptyList(), + Granularities.NONE + ) + ); + } + + public static BenchmarkSchema getSchema(String dataset) + { + return DATASET_SCHEMAS.get(dataset); + } + + private static DataSegment makeSegment(String datasource, Interval interval) + { + return makeSegment(datasource, interval, 0); + } + + private static DataSegment makeSegment(String datasource, Interval interval, int partitionNumber) + { + return DataSegment.builder() + .dataSource(datasource) + .interval(interval) + .version("1") + .shardSpec(new LinearShardSpec(partitionNumber)) + .size(0) + .build(); + } + + private static DimensionsSpec makeDimensionsSpec(GeneratorSchemaInfo schemaInfo) + { + return DimensionsSpec.builder().setDimensions(schemaInfo.getDimensionsSpecExcludeAggs().getDimensions()).build(); + } + + public static class BenchmarkSchema + { + private final List dataSegments; + private final GeneratorSchemaInfo generatorSchemaInfo; + private final TransformSpec transformSpec; + private final DimensionsSpec dimensionsSpec; + private final AggregatorFactory[] aggregators; + private final Granularity queryGranularity; + private final List projections; + + public BenchmarkSchema( + List dataSegments, + GeneratorSchemaInfo generatorSchemaInfo, + TransformSpec transformSpec, + DimensionsSpec dimensionSpec, + AggregatorFactory[] aggregators, + List projections, + Granularity queryGranularity + ) + { + this.dataSegments = dataSegments; + this.generatorSchemaInfo = generatorSchemaInfo; + this.transformSpec = transformSpec; + this.dimensionsSpec = dimensionSpec; + this.aggregators = aggregators; + this.queryGranularity = queryGranularity; + this.projections = projections; + } + + public List getDataSegments() + { + return dataSegments; + } + + public GeneratorSchemaInfo getGeneratorSchemaInfo() + { + return generatorSchemaInfo; + } + + public TransformSpec getTransformSpec() + { + return transformSpec; + } + + public DimensionsSpec getDimensionsSpec() + { + return dimensionsSpec; + } + + public AggregatorFactory[] getAggregators() + { + return aggregators; + } + + public Granularity getQueryGranularity() + { + return queryGranularity; + } + + public List getProjections() + { + return projections; + } + + public BenchmarkSchema asAutoDimensions() + { + return new SqlBenchmarkDatasets.BenchmarkSchema( + dataSegments, + generatorSchemaInfo, + transformSpec, + dimensionsSpec.withDimensions( + dimensionsSpec.getDimensions() + .stream() + .map(dim -> new AutoTypeColumnSchema(dim.getName(), null)) + .collect(Collectors.toList()) + ), + aggregators, + projections.stream() + .map(projection -> new AggregateProjectionSpec( + projection.getName(), + projection.getVirtualColumns(), + projection.getGroupingColumns() + .stream() + .map(dim -> new AutoTypeColumnSchema(dim.getName(), null)) + .collect(Collectors.toList()), + projection.getAggregators() + )).collect(Collectors.toList()), + queryGranularity + ); + } + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlComplexMetricsColumnsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlComplexMetricsColumnsBenchmark.java new file mode 100644 index 00000000000..3c001c9d039 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlComplexMetricsColumnsBenchmark.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import com.google.common.collect.ImmutableList; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.List; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +public class SqlComplexMetricsColumnsBenchmark extends SqlBaseQueryBenchmark +{ + private static final List QUERIES = ImmutableList.of( + "SELECT APPROX_COUNT_DISTINCT_DS_HLL(hll_string5) FROM druid.datasketches", + "SELECT APPROX_COUNT_DISTINCT_DS_THETA(theta_string5) FROM druid.datasketches", + "SELECT DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_float4), 0.5) FROM druid.datasketches", + "SELECT DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_long3), 0.9) FROM druid.datasketches", + "SELECT string2, APPROX_COUNT_DISTINCT_DS_HLL(hll_string5) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC", + "SELECT string2, APPROX_COUNT_DISTINCT_DS_THETA(theta_string5, 4096) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC", + "SELECT string2, DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_float4), 0.5) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC", + "SELECT string2, DS_GET_QUANTILE(DS_QUANTILES_SKETCH(quantiles_long3), 0.9) FROM druid.datasketches GROUP BY 1 ORDER BY 2 DESC" + ); + + @Param({ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7" + }) + private String query; + + @Override + public String getQuery() + { + return QUERIES.get(Integer.parseInt(query)); + } + + @Override + public List getDatasources() + { + return ImmutableList.of(SqlBenchmarkDatasets.DATASKETCHES); + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index cc6dd636df0..46036fcc64f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -19,69 +19,19 @@ package org.apache.druid.benchmark.query; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.guava.Yielder; -import org.apache.druid.java.util.common.guava.Yielders; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.math.expr.ExpressionProcessing; -import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.QueryContexts; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.groupby.GroupByQueryConfig; -import org.apache.druid.segment.AutoTypeColumnSchema; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.generator.GeneratorBasicSchemas; -import org.apache.druid.segment.generator.GeneratorSchemaInfo; -import org.apache.druid.segment.generator.SegmentGenerator; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.server.QueryStackTests; -import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; -import org.apache.druid.server.security.AuthConfig; -import org.apache.druid.server.security.AuthTestUtils; -import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest; -import org.apache.druid.sql.calcite.planner.CalciteRulesManager; -import org.apache.druid.sql.calcite.planner.CatalogResolver; -import org.apache.druid.sql.calcite.planner.DruidPlanner; -import org.apache.druid.sql.calcite.planner.PlannerConfig; -import org.apache.druid.sql.calcite.planner.PlannerFactory; -import org.apache.druid.sql.calcite.planner.PlannerResult; -import org.apache.druid.sql.calcite.run.SqlEngine; -import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog; -import org.apache.druid.sql.calcite.util.CalciteTests; -import org.apache.druid.sql.hook.DruidHookDispatcher; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.partition.LinearShardSpec; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; -import javax.annotation.Nullable; import java.util.List; import java.util.Map; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; /** * Benchmark that tests various SQL queries. @@ -90,157 +40,107 @@ import java.util.stream.Collectors; @Fork(value = 1) @Warmup(iterations = 3) @Measurement(iterations = 5) -public class SqlExpressionBenchmark +public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark { - private static final Logger log = new Logger(SqlExpressionBenchmark.class); - - static { - NullHandling.initializeForTests(); - ExpressionProcessing.initializeForTests(); - } - - private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() - { - @Override - public int intermediateComputeSizeBytes() - { - return 512 * 1024 * 1024; - } - - @Override - public int getNumMergeBuffers() - { - return 3; - } - - @Override - public int getNumThreads() - { - return 1; - } - - @Override - public String getFormatString() - { - return "benchmarks-processing-%s"; - } - }; - - private static final List QUERIES = ImmutableList.of( // =========================== // non-expression reference queries // =========================== // 0: non-expression timeseries reference, 1 columns - "SELECT SUM(long1) FROM foo", + "SELECT SUM(long1) FROM expressions", // 1: non-expression timeseries reference, 2 columns - "SELECT SUM(long1), SUM(long2) FROM foo", + "SELECT SUM(long1), SUM(long2) FROM expressions", // 2: non-expression timeseries reference, 3 columns - "SELECT SUM(long1), SUM(long4), SUM(double1) FROM foo", + "SELECT SUM(long1), SUM(long4), SUM(double1) FROM expressions", // 3: non-expression timeseries reference, 4 columns - "SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3) FROM foo", + "SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3) FROM expressions", // 4: non-expression timeseries reference, 5 columns - "SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3), SUM(long5) FROM foo", + "SELECT SUM(long1), SUM(long4), SUM(double1), SUM(float3), SUM(long5) FROM expressions", // 5: group by non-expr with 1 agg - "SELECT string2, SUM(long1) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT string2, SUM(long1) FROM expressions GROUP BY 1 ORDER BY 2", // 6: group by non-expr with 2 agg - "SELECT string2, SUM(long1), SUM(double3) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT string2, SUM(long1), SUM(double3) FROM expressions GROUP BY 1 ORDER BY 2", // =========================== // expressions // =========================== // 7: math op - 2 longs - "SELECT SUM(long1 * long2) FROM foo", + "SELECT SUM(long1 * long2) FROM expressions", // 8: mixed math - 2 longs, 1 double - "SELECT SUM((long1 * long2) / double1) FROM foo", + "SELECT SUM((long1 * long2) / double1) FROM expressions", // 9: mixed math - 2 longs, 1 double, 1 float - "SELECT SUM(float3 + ((long1 * long4)/double1)) FROM foo", + "SELECT SUM(float3 + ((long1 * long4)/double1)) FROM expressions", // 10: mixed math - 3 longs, 1 double, 1 float - "SELECT SUM(long5 - (float3 + ((long1 * long4)/double1))) FROM foo", + "SELECT SUM(long5 - (float3 + ((long1 * long4)/double1))) FROM expressions", // 11: all same math op - 3 longs, 1 double, 1 float - "SELECT SUM(long5 * float3 * long1 * long4 * double1) FROM foo", + "SELECT SUM(long5 * float3 * long1 * long4 * double1) FROM expressions", // 12: cos - "SELECT cos(double2) FROM foo", + "SELECT cos(double2) FROM expressions", // 13: unary negate - "SELECT SUM(-long4) FROM foo", + "SELECT SUM(-long4) FROM expressions", // 14: string long - "SELECT SUM(PARSE_LONG(string1)) FROM foo", + "SELECT SUM(PARSE_LONG(string1)) FROM expressions", // 15: string longer - "SELECT SUM(PARSE_LONG(string3)) FROM foo", + "SELECT SUM(PARSE_LONG(string3)) FROM expressions", // 16: time floor, non-expr col + reg agg - "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 17: time floor, non-expr col + expr agg - "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 18: time floor + non-expr agg (timeseries) (non-expression reference) - "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM foo GROUP BY 1 ORDER BY 1", + "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM expressions GROUP BY 1 ORDER BY 1", // 19: time floor + expr agg (timeseries) - "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1", + "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 1", // 20: time floor + non-expr agg (group by) - "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1) FROM expressions GROUP BY 1 ORDER BY 2", // 21: time floor + expr agg (group by) - "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2", // 22: time floor offset by 1 day + non-expr agg (group by) - "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1) FROM foo GROUP BY 1 ORDER BY 1", + "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1) FROM expressions GROUP BY 1 ORDER BY 1", // 23: time floor offset by 1 day + expr agg (group by) - "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1", + "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 1", // 24: group by long expr with non-expr agg - "SELECT (long1 * long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT (long1 * long2), SUM(double1) FROM expressions GROUP BY 1 ORDER BY 2", // 25: group by non-expr with expr agg - "SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT string2, SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2", // 26: group by string expr with non-expr agg - "SELECT CONCAT(string2, '-', long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT CONCAT(string2, '-', long2), SUM(double1) FROM expressions GROUP BY 1 ORDER BY 2", // 27: group by string expr with expr agg - "SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM expressions GROUP BY 1 ORDER BY 2", // 28: group by single input string low cardinality expr with expr agg - "SELECT CONCAT(string2, '-', 'foo'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT CONCAT(string2, '-', 'expressions'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2", // 29: group by single input string high cardinality expr with expr agg - "SELECT CONCAT(string3, '-', 'foo'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT CONCAT(string3, '-', 'expressions'), SUM(long1 * long4) FROM expressions GROUP BY 1 ORDER BY 2", // 30: logical and operator - "SELECT CAST(long1 as BOOLEAN) AND CAST (long2 as BOOLEAN), COUNT(*) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT CAST(long1 as BOOLEAN) AND CAST (long2 as BOOLEAN), COUNT(*) FROM expressions GROUP BY 1 ORDER BY 2", // 31: isnull, notnull - "SELECT long5 IS NULL, long3 IS NOT NULL, count(*) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT long5 IS NULL, long3 IS NOT NULL, count(*) FROM expressions GROUP BY 1,2 ORDER BY 3", // 32: time shift, non-expr col + reg agg, regular - "SELECT TIME_SHIFT(__time, 'PT1H', 3), string2, SUM(double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_SHIFT(__time, 'PT1H', 3), string2, SUM(double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 33: time shift, non-expr col + expr agg, sequential low cardinality - "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long1), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long1), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 34: time shift + non-expr agg (timeseries) (non-expression reference), zipf distribution low cardinality - "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long2), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long2), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 35: time shift + expr agg (timeseries), zipf distribution high cardinality - "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long3), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long3), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 36: time shift + non-expr agg (group by), uniform distribution low cardinality - "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 37: time shift + expr agg (group by), uniform distribution high cardinality - "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM expressions GROUP BY 1,2 ORDER BY 3", // 38,39: array element filtering - "SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2", - "SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2", + "SELECT string1, long1 FROM expressions WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2", + "SELECT string1, long1 FROM expressions WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2", // 40: regex filtering - "SELECT string4, COUNT(*) FROM foo WHERE REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL GROUP BY 1", + "SELECT string4, COUNT(*) FROM expressions WHERE REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL GROUP BY 1", // 41: complicated filtering - "SELECT string2, SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) GROUP BY 1 ORDER BY 2", + "SELECT string2, SUM(long1) FROM expressions WHERE string1 = '1000' AND string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) GROUP BY 1 ORDER BY 2", // 42: array_contains expr - "SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM foo", - "SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo", - "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM foo", - "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo", + "SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM expressions", + "SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM expressions", + "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM expressions", + "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM expressions", // 46: filters with random orders - "SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000' GROUP BY 1 ORDER BY 2", - "SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2" - ); - - @Param({"5000000"}) - private int rowsPerSegment; - - @Param({ - "false", - "force" - }) - private String vectorize; - - @Param({ - "explicit", - "auto" - }) - private String schema; + "SELECT string2, SUM(long1) FROM expressions WHERE string5 LIKE '%1%' AND string1 = '1000' GROUP BY 1 ORDER BY 2", + "SELECT string2, SUM(long1) FROM expressions WHERE string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2" + ); @Param({ "singleString", @@ -304,154 +204,26 @@ public class SqlExpressionBenchmark }) private String query; - private SqlEngine engine; - @Nullable - private PlannerFactory plannerFactory; - private Closer closer = Closer.create(); - - @Setup(Level.Trial) - public void setup() + @Override + public String getQuery() { - final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench"); - - final DataSegment dataSegment = DataSegment.builder() - .dataSource("foo") - .interval(schemaInfo.getDataInterval()) - .version("1") - .shardSpec(new LinearShardSpec(0)) - .size(0) - .build(); - - final PlannerConfig plannerConfig = new PlannerConfig(); - - final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); - log.info( - "Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].", - segmentGenerator.getCacheDir(), - rowsPerSegment, - schema - ); - final QueryableIndex index; - if ("auto".equals(schema)) { - List columnSchemas = schemaInfo.getDimensionsSpec() - .getDimensions() - .stream() - .map(x -> new AutoTypeColumnSchema(x.getName(), null)) - .collect(Collectors.toList()); - index = segmentGenerator.generate( - dataSegment, - schemaInfo, - DimensionsSpec.builder().setDimensions(columnSchemas).build(), - TransformSpec.NONE, - IndexSpec.DEFAULT, - Granularities.NONE, - rowsPerSegment - ); - } else { - index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); - } - - final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate( - closer, - PROCESSING_CONFIG - ); - - final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate).add( - dataSegment, - index - ); - closer.register(walker); - final ObjectMapper jsonMapper = CalciteTests.getJsonMapper(); - final DruidSchemaCatalog rootSchema = - CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER); - engine = CalciteTests.createMockSqlEngine(walker, conglomerate); - plannerFactory = new PlannerFactory( - rootSchema, - CalciteTests.createOperatorTable(), - CalciteTests.createExprMacroTable(), - plannerConfig, - AuthTestUtils.TEST_AUTHORIZER_MAPPER, - jsonMapper, - CalciteTests.DRUID_SCHEMA_NAME, - new CalciteRulesManager(ImmutableSet.of()), - CalciteTests.createJoinableFactoryWrapper(), - CatalogResolver.NULL_RESOLVER, - new AuthConfig(), - new DruidHookDispatcher() - ); - - try { - SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( - engine, - plannerFactory, - QUERIES.get(Integer.parseInt(query)) - ); - log.info("non-vectorized and vectorized results match"); - } - catch (Throwable ex) { - log.warn(ex, "non-vectorized and vectorized results do not match"); - } - - final String sql = QUERIES.get(Integer.parseInt(query)); - - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting( - engine, - "EXPLAIN PLAN FOR " + sql, - ImmutableMap.of( - "useNativeQueryExplain", - true - ) - )) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] planResult = resultSequence.toList().get(0); - log.info("Native query plan:\n" + - jsonMapper.writerWithDefaultPrettyPrinter() - .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) - ); - } - catch (JsonProcessingException ex) { - log.warn(ex, "explain failed"); - } - - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Yielder yielder = Yielders.each(resultSequence); - int rowCounter = 0; - while (!yielder.isDone()) { - rowCounter++; - yielder.next(yielder.get()); - } - log.info("Total result row count:" + rowCounter); - } - catch (Throwable ex) { - log.warn(ex, "failed to count rows"); - } + return QUERIES.get(Integer.parseInt(query)); } - @TearDown(Level.Trial) - public void tearDown() throws Exception + @Override + public List getDatasources() { - closer.close(); + return ImmutableList.of(SqlBenchmarkDatasets.EXPRESSIONS); } - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void querySql(Blackhole blackhole) + @Override + protected Map getContext() { final Map context = ImmutableMap.of( QueryContexts.VECTORIZE_KEY, vectorize, QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize, GroupByQueryConfig.CTX_KEY_DEFER_EXPRESSION_DIMENSIONS, deferExpressionDimensions ); - final String sql = QUERIES.get(Integer.parseInt(query)); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); - blackhole.consume(lastRow); - } + return context; } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java index 1f58f97b8e8..fbb4d1c394d 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java @@ -19,109 +19,23 @@ package org.apache.druid.benchmark.query; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.guice.BuiltInTypesModule; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.math.expr.ExpressionProcessing; -import org.apache.druid.query.DruidProcessingConfig; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.segment.AutoTypeColumnSchema; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.column.StringEncodingStrategy; -import org.apache.druid.segment.generator.GeneratorBasicSchemas; -import org.apache.druid.segment.generator.GeneratorSchemaInfo; -import org.apache.druid.segment.generator.SegmentGenerator; -import org.apache.druid.segment.transform.ExpressionTransform; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.server.QueryStackTests; -import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; -import org.apache.druid.server.security.AuthConfig; -import org.apache.druid.server.security.AuthTestUtils; -import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest; -import org.apache.druid.sql.calcite.planner.CalciteRulesManager; -import org.apache.druid.sql.calcite.planner.CatalogResolver; -import org.apache.druid.sql.calcite.planner.DruidPlanner; -import org.apache.druid.sql.calcite.planner.PlannerConfig; -import org.apache.druid.sql.calcite.planner.PlannerFactory; -import org.apache.druid.sql.calcite.planner.PlannerResult; -import org.apache.druid.sql.calcite.run.SqlEngine; -import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog; -import org.apache.druid.sql.calcite.util.CalciteTests; -import org.apache.druid.sql.hook.DruidHookDispatcher; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.partition.LinearShardSpec; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; -import javax.annotation.Nullable; import java.util.Collections; import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; @State(Scope.Benchmark) @Fork(value = 1) @Warmup(iterations = 3) @Measurement(iterations = 5) -public class SqlGroupByBenchmark +public class SqlGroupByBenchmark extends SqlBaseQueryBenchmark { - static { - NullHandling.initializeForTests(); - ExpressionProcessing.initializeForTests(); - BuiltInTypesModule.registerHandlersAndSerde(); - } - - private static final Logger log = new Logger(SqlGroupByBenchmark.class); - - private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() - { - @Override - public int intermediateComputeSizeBytes() - { - return 512 * 1024 * 1024; - } - - @Override - public int getNumMergeBuffers() - { - return 3; - } - - @Override - public int getNumThreads() - { - return 1; - } - - @Override - public String getFormatString() - { - return "benchmarks-processing-%s"; - } - }; - @Param({ "string-Sequential-100_000", "string-Sequential-10_000_000", @@ -167,226 +81,16 @@ public class SqlGroupByBenchmark }) private String groupingDimension; - private SqlEngine engine; - @Nullable - private PlannerFactory plannerFactory; - private Closer closer = Closer.create(); - @Setup(Level.Trial) - public void setup() + @Override + public String getQuery() { - final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("groupBy-testbench"); - - final DataSegment dataSegment = DataSegment.builder() - .dataSource("foo") - .interval(schemaInfo.getDataInterval()) - .version("1") - .shardSpec(new LinearShardSpec(0)) - .size(0) - .build(); - final DataSegment dataSegment2 = DataSegment.builder() - .dataSource("foo") - .interval(schemaInfo.getDataInterval()) - .version("1") - .shardSpec(new LinearShardSpec(1)) - .size(0) - .build(); - - - final PlannerConfig plannerConfig = new PlannerConfig(); - - String columnCardinalityWithUnderscores = groupingDimension.substring(groupingDimension.lastIndexOf('-') + 1); - int rowsPerSegment = Integer.parseInt(StringUtils.replace(columnCardinalityWithUnderscores, "_", "")); - - final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); - - TransformSpec transformSpec = new TransformSpec( - null, - ImmutableList.of( - // string array dims - new ExpressionTransform( - "stringArray-Sequential-100_000", - "array(\"string-Sequential-100_000\")", - TestExprMacroTable.INSTANCE - ), - new ExpressionTransform( - "stringArray-Sequential-3_000_000", - "array(\"string-Sequential-10_000_000\")", - TestExprMacroTable.INSTANCE - ), - /* - new ExpressionTransform( - "stringArray-Sequential-1_000_000_000", - "array(\"string-Sequential-1_000_000_000\")", - TestExprMacroTable.INSTANCE - ),*/ - new ExpressionTransform( - "stringArray-ZipF-1_000_000", - "array(\"string-ZipF-1_000_000\")", - TestExprMacroTable.INSTANCE - ), - new ExpressionTransform( - "stringArray-Uniform-1_000_000", - "array(\"string-Uniform-1_000_000\")", - TestExprMacroTable.INSTANCE - ), - - // long array dims - new ExpressionTransform( - "longArray-Sequential-100_000", - "array(\"long-Sequential-100_000\")", - TestExprMacroTable.INSTANCE - ), - new ExpressionTransform( - "longArray-Sequential-3_000_000", - "array(\"long-Sequential-10_000_000\")", - TestExprMacroTable.INSTANCE - ), - /* - new ExpressionTransform( - "longArray-Sequential-1_000_000_000", - "array(\"long-Sequential-1_000_000_000\")", - TestExprMacroTable.INSTANCE - ),*/ - new ExpressionTransform( - "longArray-ZipF-1_000_000", - "array(\"long-ZipF-1_000_000\")", - TestExprMacroTable.INSTANCE - ), - new ExpressionTransform( - "longArray-Uniform-1_000_000", - "array(\"long-Uniform-1_000_000\")", - TestExprMacroTable.INSTANCE - ), - - // nested complex json dim - new ExpressionTransform( - "nested-Sequential-100_000", - "json_object('long1', \"long-Sequential-100_000\", 'nesteder', json_object('long1', \"long-Sequential-100_000\"))", - TestExprMacroTable.INSTANCE - ), - new ExpressionTransform( - "nested-Sequential-3_000_000", - "json_object('long1', \"long-Sequential-10_000_000\", 'nesteder', json_object('long1', \"long-Sequential-10_000_000\"))", - TestExprMacroTable.INSTANCE - ), - /*new ExpressionTransform( - "nested-Sequential-1_000_000_000", - "json_object('long1', \"long-Sequential-1_000_000_000\", 'nesteder', json_object('long1', \"long-Sequential-1_000_000_000\"))", - TestExprMacroTable.INSTANCE - ),*/ - new ExpressionTransform( - "nested-ZipF-1_000_000", - "json_object('long1', \"long-ZipF-1_000_000\", 'nesteder', json_object('long1', \"long-ZipF-1_000_000\"))", - TestExprMacroTable.INSTANCE - ), - new ExpressionTransform( - "nested-Uniform-1_000_000", - "json_object('long1', \"long-Uniform-1_000_000\", 'nesteder', json_object('long1', \"long-Uniform-1_000_000\"))", - TestExprMacroTable.INSTANCE - ) - ) - ); - - List columnSchemas = schemaInfo.getDimensionsSpec() - .getDimensions() - .stream() - .map(x -> new AutoTypeColumnSchema(x.getName(), null)) - .collect(Collectors.toList()); - - List transformSchemas = transformSpec - .getTransforms() - .stream() - .map( - transform -> new AutoTypeColumnSchema(transform.getName(), null) - ) - .collect(Collectors.toList()); - - - - final QueryableIndex index = segmentGenerator.generate( - dataSegment, - schemaInfo, - DimensionsSpec.builder() - .setDimensions(ImmutableList.builder() - .addAll(columnSchemas) - .addAll(transformSchemas) - .build() - ) - .build(), - transformSpec, - IndexSpec.builder().withStringDictionaryEncoding(new StringEncodingStrategy.Utf8()).build(), - Granularities.NONE, - rowsPerSegment - ); - - final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate( - closer, - PROCESSING_CONFIG - ); - - final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate) - .add(dataSegment, index) - .add(dataSegment2, index); - closer.register(walker); - - // Hacky and pollutes global namespace, but it is fine since benchmarks are run in isolation. Wasn't able - // to work up a cleaner way of doing it by modifying the injector. - CalciteTests.getJsonMapper().registerModules(BuiltInTypesModule.getJacksonModulesList()); - - final DruidSchemaCatalog rootSchema = - CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER); - engine = CalciteTests.createMockSqlEngine(walker, conglomerate); - plannerFactory = new PlannerFactory( - rootSchema, - CalciteTests.createOperatorTable(), - CalciteTests.createExprMacroTable(), - plannerConfig, - AuthTestUtils.TEST_AUTHORIZER_MAPPER, - CalciteTests.getJsonMapper(), - CalciteTests.DRUID_SCHEMA_NAME, - new CalciteRulesManager(ImmutableSet.of()), - CalciteTests.createJoinableFactoryWrapper(), - CatalogResolver.NULL_RESOLVER, - new AuthConfig(), - new DruidHookDispatcher() - ); - - try { - SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( - engine, - plannerFactory, - sqlQuery(groupingDimension) - ); - log.info("non-vectorized and vectorized results match"); - } - catch (Throwable ex) { - log.warn(ex, "non-vectorized and vectorized results do not match"); - } + return StringUtils.format("SELECT \"%s\", COUNT(*) FROM druid.%s GROUP BY 1", groupingDimension, SqlBenchmarkDatasets.GROUPER); } - @TearDown(Level.Trial) - public void tearDown() throws Exception + @Override + public List getDatasources() { - closer.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void querySql(Blackhole blackhole) - { - final String sql = sqlQuery(groupingDimension); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, Collections.emptyMap())) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); - blackhole.consume(lastRow); - } - } - - private static String sqlQuery(String groupingDimension) - { - return StringUtils.format("SELECT \"%s\", COUNT(*) FROM foo GROUP BY 1", groupingDimension); + return Collections.singletonList(SqlBenchmarkDatasets.GROUPER); } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java index fa5942dc2c9..73fc97dd78a 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java @@ -19,224 +19,109 @@ package org.apache.druid.benchmark.query; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.guava.Yielder; -import org.apache.druid.java.util.common.guava.Yielders; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.math.expr.ExpressionProcessing; -import org.apache.druid.query.DruidProcessingConfig; -import org.apache.druid.query.QueryContexts; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.segment.AutoTypeColumnSchema; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.column.StringEncodingStrategy; -import org.apache.druid.segment.data.FrontCodedIndexed; -import org.apache.druid.segment.generator.GeneratorBasicSchemas; -import org.apache.druid.segment.generator.GeneratorSchemaInfo; -import org.apache.druid.segment.generator.SegmentGenerator; -import org.apache.druid.segment.transform.ExpressionTransform; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.server.QueryStackTests; -import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; -import org.apache.druid.server.security.AuthConfig; -import org.apache.druid.server.security.AuthTestUtils; -import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest; -import org.apache.druid.sql.calcite.planner.CalciteRulesManager; -import org.apache.druid.sql.calcite.planner.CatalogResolver; -import org.apache.druid.sql.calcite.planner.DruidPlanner; -import org.apache.druid.sql.calcite.planner.PlannerConfig; -import org.apache.druid.sql.calcite.planner.PlannerFactory; -import org.apache.druid.sql.calcite.planner.PlannerResult; -import org.apache.druid.sql.calcite.run.SqlEngine; -import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog; -import org.apache.druid.sql.calcite.util.CalciteTests; -import org.apache.druid.sql.hook.DruidHookDispatcher; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.partition.LinearShardSpec; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; -import javax.annotation.Nullable; -import java.util.Collections; import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; @State(Scope.Benchmark) @Fork(value = 1) @Warmup(iterations = 3) @Measurement(iterations = 5) -public class SqlNestedDataBenchmark +public class SqlNestedDataBenchmark extends SqlBaseQueryBenchmark { - private static final Logger log = new Logger(SqlNestedDataBenchmark.class); - - static { - NullHandling.initializeForTests(); - ExpressionProcessing.initializeForTests(); - } - - private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() - { - @Override - public int intermediateComputeSizeBytes() - { - return 512 * 1024 * 1024; - } - - @Override - public int getNumMergeBuffers() - { - return 3; - } - - @Override - public int getNumThreads() - { - return 1; - } - - @Override - public String getFormatString() - { - return "benchmarks-processing-%s"; - } - }; - - private static final List QUERIES = ImmutableList.of( // =========================== // non-nested reference queries // =========================== // 0,1: timeseries, 1 columns - "SELECT SUM(long1) FROM foo", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo", + "SELECT SUM(long1) FROM druid.nested", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested", // 2,3: timeseries, 2 columns - "SELECT SUM(long1), SUM(long2) FROM foo", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)) FROM foo", + "SELECT SUM(long1), SUM(long2) FROM druid.nested", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)) FROM druid.nested", // 4,5: timeseries, 3 columns - "SELECT SUM(long1), SUM(long2), SUM(double3) FROM foo", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo", + "SELECT SUM(long1), SUM(long2), SUM(double3) FROM druid.nested", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested", // 6,7: group by string with 1 agg - "SELECT string1, SUM(long1) FROM foo GROUP BY 1 ORDER BY 2", - "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT string1, SUM(long1) FROM druid.nested GROUP BY 1 ORDER BY 2", + "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested GROUP BY 1 ORDER BY 2", // 8,9: group by string with 2 agg - "SELECT string1, SUM(long1), SUM(double3) FROM foo GROUP BY 1 ORDER BY 2", - "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo GROUP BY 1 ORDER BY 2", + "SELECT string1, SUM(long1), SUM(double3) FROM druid.nested GROUP BY 1 ORDER BY 2", + "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested GROUP BY 1 ORDER BY 2", // 10,11: time-series filter string - "SELECT SUM(long1) FROM foo WHERE string1 = '10000' OR string1 = '1000'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000'", + "SELECT SUM(long1) FROM druid.nested WHERE string1 = '10000' OR string1 = '1000'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000'", // 12,13: time-series filter long - "SELECT SUM(long1) FROM foo WHERE long2 = 10000 OR long2 = 1000", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 10000 OR JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 1000", + "SELECT SUM(long1) FROM druid.nested WHERE long2 = 10000 OR long2 = 1000", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 10000 OR JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) = 1000", // 14,15: time-series filter double - "SELECT SUM(long1) FROM foo WHERE double3 < 10000.0 AND double3 > 1000.0", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", + "SELECT SUM(long1) FROM druid.nested WHERE double3 < 10000.0 AND double3 > 1000.0", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", // 16,17: group by long filter by string - "SELECT long1, SUM(double3) FROM foo WHERE string1 = '10000' OR string1 = '1000' GROUP BY 1 ORDER BY 2", - "SELECT JSON_VALUE(nested, '$.long1' RETURNING BIGINT), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000' GROUP BY 1 ORDER BY 2", + "SELECT long1, SUM(double3) FROM druid.nested WHERE string1 = '10000' OR string1 = '1000' GROUP BY 1 ORDER BY 2", + "SELECT JSON_VALUE(nested, '$.long1' RETURNING BIGINT), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') = '10000' OR JSON_VALUE(nested, '$.nesteder.string1') = '1000' GROUP BY 1 ORDER BY 2", // 18,19: group by string filter by long - "SELECT string1, SUM(double3) FROM foo WHERE long2 < 10000 AND long2 > 1000 GROUP BY 1 ORDER BY 2", - "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) < 10000 AND JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) > 1000 GROUP BY 1 ORDER BY 2", + "SELECT string1, SUM(double3) FROM druid.nested WHERE long2 < 10000 AND long2 > 1000 GROUP BY 1 ORDER BY 2", + "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) < 10000 AND JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) > 1000 GROUP BY 1 ORDER BY 2", // 20,21: group by string filter by double - "SELECT string1, SUM(double3) FROM foo WHERE double3 < 10000.0 AND double3 > 1000.0 GROUP BY 1 ORDER BY 2", - "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0 GROUP BY 1 ORDER BY 2", + "SELECT string1, SUM(double3) FROM druid.nested WHERE double3 < 10000.0 AND double3 > 1000.0 GROUP BY 1 ORDER BY 2", + "SELECT JSON_VALUE(nested, '$.nesteder.string1'), SUM(JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 10000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0 GROUP BY 1 ORDER BY 2", // 22, 23: - "SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46)", - "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46)", + "SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46)", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46)", // 24, 25 - "SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1", - "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1", + "SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46) GROUP BY 1", // 26, 27 - "SELECT SUM(long1) FROM foo WHERE double3 < 1005.0 AND double3 > 1000.0", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 1005.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", + "SELECT SUM(long1) FROM druid.nested WHERE double3 < 1005.0 AND double3 > 1000.0", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 1005.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", // 28, 29 - "SELECT SUM(long1) FROM foo WHERE double3 < 2000.0 AND double3 > 1000.0", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 2000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", + "SELECT SUM(long1) FROM druid.nested WHERE double3 < 2000.0 AND double3 > 1000.0", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 2000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", // 30, 31 - "SELECT SUM(long1) FROM foo WHERE double3 < 3000.0 AND double3 > 1000.0", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 3000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", + "SELECT SUM(long1) FROM druid.nested WHERE double3 < 3000.0 AND double3 > 1000.0", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 3000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", // 32,33 - "SELECT SUM(long1) FROM foo WHERE double3 < 5000.0 AND double3 > 1000.0", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 5000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", + "SELECT SUM(long1) FROM druid.nested WHERE double3 < 5000.0 AND double3 > 1000.0", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 5000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0", // 34,35 smaller cardinality like range filter - "SELECT SUM(long1) FROM foo WHERE string1 LIKE '1%'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '1%'", + "SELECT SUM(long1) FROM druid.nested WHERE string1 LIKE '1%'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '1%'", // 36,37 smaller cardinality like predicate filter - "SELECT SUM(long1) FROM foo WHERE string1 LIKE '%1%'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '%1%'", + "SELECT SUM(long1) FROM druid.nested WHERE string1 LIKE '%1%'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '%1%'", // 38-39 moderate cardinality like range - "SELECT SUM(long1) FROM foo WHERE string5 LIKE '1%'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '1%'", + "SELECT SUM(long1) FROM druid.nested WHERE string5 LIKE '1%'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '1%'", // 40, 41 big cardinality lex range - "SELECT SUM(long1) FROM foo WHERE string5 > '1'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') > '1'", + "SELECT SUM(long1) FROM druid.nested WHERE string5 > '1'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') > '1'", // 42, 43 big cardinality like predicate filter - "SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'", + "SELECT SUM(long1) FROM druid.nested WHERE string5 LIKE '%1%'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'", // 44, 45 big cardinality like filter + selector filter with different ordering - "SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'", - "SELECT SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '1000' AND JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'", + "SELECT SUM(long1) FROM druid.nested WHERE string5 LIKE '%1%' AND string1 = '1000'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'", + "SELECT SUM(long1) FROM druid.nested WHERE string1 = '1000' AND string5 LIKE '%1%'", + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.string1') = '1000' AND JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'", //48,49 bigger in - "SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)", - "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)", + "SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)", //50, 51 bigger in group - "SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1", - "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1", - "SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)", - "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)", - "SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1", - "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1" + "SELECT long2 FROM druid.nested WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1", + "SELECT long2 FROM druid.nested WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)", + "SELECT long2 FROM druid.nested WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1" ); - @Param({"5000000"}) - private int rowsPerSegment; - - @Param({ - "false", - "force" - }) - private String vectorize; - - @Param({ - "none", - "front-coded-4", - "front-coded-16" - }) - private String stringEncoding; - - @Param({ - "explicit", - "auto" - }) - private String schema; @Param({ "0", @@ -298,179 +183,15 @@ public class SqlNestedDataBenchmark }) private String query; - private SqlEngine engine; - @Nullable - private PlannerFactory plannerFactory; - private final Closer closer = Closer.create(); - - @Setup(Level.Trial) - public void setup() + @Override + public String getQuery() { - final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench"); - - final DataSegment dataSegment = DataSegment.builder() - .dataSource("foo") - .interval(schemaInfo.getDataInterval()) - .version("1") - .shardSpec(new LinearShardSpec(0)) - .size(0) - .build(); - - - final PlannerConfig plannerConfig = new PlannerConfig(); - - final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); - log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment); - - TransformSpec transformSpec = new TransformSpec( - null, - ImmutableList.of( - new ExpressionTransform( - "nested", - "json_object('long1', long1, 'nesteder', json_object('string1', string1, 'long2', long2, 'double3',double3, 'string5', string5))", - TestExprMacroTable.INSTANCE - ) - ) - ); - - - - StringEncodingStrategy encodingStrategy; - if (stringEncoding.startsWith("front-coded")) { - String[] split = stringEncoding.split("-"); - int bucketSize = Integer.parseInt(split[2]); - encodingStrategy = new StringEncodingStrategy.FrontCoded(bucketSize, FrontCodedIndexed.V1); - } else { - encodingStrategy = new StringEncodingStrategy.Utf8(); - } - final QueryableIndex index; - if ("auto".equals(schema)) { - Iterable columnSchemas = Iterables.concat( - schemaInfo.getDimensionsSpec() - .getDimensions() - .stream() - .map(x -> new AutoTypeColumnSchema(x.getName(), null)) - .collect(Collectors.toList()), - Collections.singletonList(new AutoTypeColumnSchema("nested", null)) - ); - index = segmentGenerator.generate( - dataSegment, - schemaInfo, - DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(), - transformSpec, - IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(), - Granularities.NONE, - rowsPerSegment - ); - } else { - Iterable columnSchemas = Iterables.concat( - schemaInfo.getDimensionsSpec().getDimensions(), - Collections.singletonList(new AutoTypeColumnSchema("nested", null)) - ); - index = segmentGenerator.generate( - dataSegment, - schemaInfo, - DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(), - transformSpec, - IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(), - Granularities.NONE, - rowsPerSegment - ); - } - - final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate( - closer, - PROCESSING_CONFIG - ); - - final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate).add( - dataSegment, - index - ); - closer.register(walker); - - final DruidSchemaCatalog rootSchema = - CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER); - engine = CalciteTests.createMockSqlEngine(walker, conglomerate); - plannerFactory = new PlannerFactory( - rootSchema, - CalciteTests.createOperatorTable(), - CalciteTests.createExprMacroTable(), - plannerConfig, - AuthTestUtils.TEST_AUTHORIZER_MAPPER, - CalciteTests.getJsonMapper(), - CalciteTests.DRUID_SCHEMA_NAME, - new CalciteRulesManager(ImmutableSet.of()), - CalciteTests.createJoinableFactoryWrapper(), - CatalogResolver.NULL_RESOLVER, - new AuthConfig(), - new DruidHookDispatcher() - ); - - try { - SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( - engine, - plannerFactory, - QUERIES.get(Integer.parseInt(query)) - ); - log.info("non-vectorized and vectorized results match"); - } - catch (Throwable ex) { - log.warn(ex, "non-vectorized and vectorized results do not match"); - } - - final String sql = QUERIES.get(Integer.parseInt(query)); - final ObjectMapper jsonMapper = CalciteTests.getJsonMapper(); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] planResult = resultSequence.toList().get(0); - log.info("Native query plan:\n" + - jsonMapper.writerWithDefaultPrettyPrinter() - .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) - ); - } - catch (JsonProcessingException ex) { - log.warn(ex, "explain failed"); - } - - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Yielder yielder = Yielders.each(resultSequence); - int rowCounter = 0; - while (!yielder.isDone()) { - rowCounter++; - yielder.next(yielder.get()); - } - log.info("Total result row count:" + rowCounter); - } - catch (Throwable ex) { - log.warn(ex, "failed to count rows"); - } + return QUERIES.get(Integer.parseInt(query)); } - @TearDown(Level.Trial) - public void tearDown() throws Exception + @Override + public List getDatasources() { - closer.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void querySql(Blackhole blackhole) - { - final Map context = ImmutableMap.of( - QueryContexts.VECTORIZE_KEY, vectorize, - QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize - ); - final String sql = QUERIES.get(Integer.parseInt(query)); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); - blackhole.consume(lastRow); - } + return ImmutableList.of(SqlBenchmarkDatasets.NESTED); } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlPlanBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlPlanBenchmark.java new file mode 100644 index 00000000000..29e9cb2c24f --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlPlanBenchmark.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import com.google.common.collect.ImmutableList; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.List; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +public class SqlPlanBenchmark extends SqlBasePlanBenchmark +{ + @Param({ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41" + }) + private String query; + + @Override + public String getQuery() + { + return SqlBenchmark.QUERIES.get(Integer.parseInt(query)); + } + + @Override + public List getDatasources() + { + return ImmutableList.of(SqlBenchmarkDatasets.BASIC); + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlProjectionsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlProjectionsBenchmark.java new file mode 100644 index 00000000000..78b56eadf36 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlProjectionsBenchmark.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.query.QueryContexts; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.List; +import java.util.Map; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +public class SqlProjectionsBenchmark extends SqlBaseQueryBenchmark +{ + private static final List QUERIES = ImmutableList.of( + "SELECT string2, APPROX_COUNT_DISTINCT_DS_HLL(string5) FROM druid.projections GROUP BY 1 ORDER BY 2", + "SELECT string2, SUM(long4) FROM druid.projections GROUP BY 1 ORDER BY 2" + ); + + @Param({ + "0", + "1" + }) + private String query; + + @Param({ + "true", + "false" + }) + private boolean useProjections; + + @Override + public String getQuery() + { + return QUERIES.get(Integer.parseInt(query)); + } + + @Override + public List getDatasources() + { + return ImmutableList.of(SqlBenchmarkDatasets.PROJECTIONS); + } + + @Override + protected Map getContext() + { + final Map context = ImmutableMap.of( + QueryContexts.VECTORIZE_KEY, vectorize, + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize, + useProjections ? QueryContexts.FORCE_PROJECTION : QueryContexts.NO_PROJECTIONS, true + ); + return context; + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlWindowFunctionsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlWindowFunctionsBenchmark.java index b5398ead4c9..550dad624f1 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlWindowFunctionsBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlWindowFunctionsBenchmark.java @@ -19,98 +19,22 @@ package org.apache.druid.benchmark.query; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.inject.Injector; -import com.google.inject.Key; -import com.google.inject.multibindings.MapBinder; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.frame.FrameType; -import org.apache.druid.frame.testutil.FrameTestUtil; -import org.apache.druid.guice.ExpressionModule; -import org.apache.druid.guice.LazySingleton; -import org.apache.druid.guice.SegmentWranglerModule; -import org.apache.druid.guice.StartupInjectorBuilder; -import org.apache.druid.guice.annotations.Json; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.math.expr.ExprMacroTable; -import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.QueryContexts; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator; -import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctUtf8SqlAggregator; -import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchApproxQuantileSqlAggregator; -import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchObjectSqlAggregator; -import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator; -import org.apache.druid.query.lookup.LookupExtractor; -import org.apache.druid.segment.AutoTypeColumnSchema; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.QueryableIndexCursorFactory; -import org.apache.druid.segment.QueryableIndexSegment; -import org.apache.druid.segment.column.StringEncodingStrategy; -import org.apache.druid.segment.generator.GeneratorBasicSchemas; -import org.apache.druid.segment.generator.GeneratorSchemaInfo; -import org.apache.druid.segment.generator.SegmentGenerator; -import org.apache.druid.segment.join.JoinableFactoryWrapper; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.server.QueryStackTests; -import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; -import org.apache.druid.server.security.AuthConfig; -import org.apache.druid.server.security.AuthTestUtils; -import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator; -import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule; -import org.apache.druid.sql.calcite.aggregation.SqlAggregator; -import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; -import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; -import org.apache.druid.sql.calcite.expression.builtin.QueryLookupOperatorConversion; -import org.apache.druid.sql.calcite.planner.CalciteRulesManager; -import org.apache.druid.sql.calcite.planner.CatalogResolver; -import org.apache.druid.sql.calcite.planner.DruidOperatorTable; -import org.apache.druid.sql.calcite.planner.DruidPlanner; -import org.apache.druid.sql.calcite.planner.PlannerConfig; -import org.apache.druid.sql.calcite.planner.PlannerFactory; -import org.apache.druid.sql.calcite.planner.PlannerResult; -import org.apache.druid.sql.calcite.run.SqlEngine; -import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog; -import org.apache.druid.sql.calcite.util.CalciteTests; -import org.apache.druid.sql.calcite.util.LookylooModule; -import org.apache.druid.sql.calcite.util.QueryFrameworkUtils; -import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule; -import org.apache.druid.sql.hook.DruidHookDispatcher; -import org.apache.druid.timeline.DataSegment; -import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; -import javax.annotation.Nullable; -import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; /** * Benchmark that tests various SQL queries. @@ -121,319 +45,55 @@ import java.util.stream.Collectors; @Fork(value = 1) @Warmup(iterations = 3) @Measurement(iterations = 5) -public class SqlWindowFunctionsBenchmark +public class SqlWindowFunctionsBenchmark extends SqlBaseQueryBenchmark { - static { - NullHandling.initializeForTests(); - } + private static final List QUERIES = ImmutableList.of( + "SELECT SUM(dimSequentialHalfNull) FROM druid.basic GROUP BY dimUniform", + "SELECT SUM(SUM(dimSequentialHalfNull)) OVER (ORDER BY dimUniform) FROM druid.basic GROUP BY dimUniform", + "SELECT ROW_NUMBER() OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic", + "SELECT COUNT(*) OVER (PARTITION BY dimUniform RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM druid.basic", + "SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED PRECEDING) FROM druid.basic", + "SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED FOLLOWING) FROM druid.basic", + "SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic GROUP BY dimSequential, dimUniform", + "SELECT COUNT(*) OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic GROUP BY dimUniform, dimSequential", + "SELECT SUM(dimSequentialHalfNull) + SUM(dimZipf), LAG(SUM(dimSequentialHalfNull + dimZipf)) OVER (PARTITION BY dimUniform ORDER BY dimSequential) FROM druid.basic GROUP BY __time, dimUniform, dimSequential" + ); - private static final Logger log = new Logger(SqlWindowFunctionsBenchmark.class); - - private static final String STORAGE_MMAP = "mmap"; - private static final String STORAGE_FRAME_ROW = "frame-row"; - private static final String STORAGE_FRAME_COLUMNAR = "frame-columnar"; - - @Param({"2000000"}) - private int rowsPerSegment; @Param({ - "auto" + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8" }) - private String schema; + private String query; - // Can be STORAGE_MMAP, STORAGE_FRAME_ROW, or STORAGE_FRAME_COLUMNAR - @Param({STORAGE_MMAP}) - private String storageType; - private SqlEngine engine; - - @Nullable - private PlannerFactory plannerFactory; - private final Closer closer = Closer.create(); - - private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() { - - @Override - public int getNumMergeBuffers() - { - return 3; - } - - @Override - public int intermediateComputeSizeBytes() - { - return 200_000_000; - } - }; - - @Setup(Level.Trial) - public void setup() + @Override + public String getQuery() { - final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("basic"); - final DataSegment dataSegment = schemaInfo.makeSegmentDescriptor("foo"); - final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); - - log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment); - final QueryableIndex index; - if ("auto".equals(schema)) { - List columnSchemas = schemaInfo.getDimensionsSpec() - .getDimensions() - .stream() - .map(x -> new AutoTypeColumnSchema(x.getName(), null)) - .collect(Collectors.toList()); - index = segmentGenerator.generate( - dataSegment, - schemaInfo, - DimensionsSpec.builder().setDimensions(columnSchemas).build(), - TransformSpec.NONE, - IndexSpec.builder().withStringDictionaryEncoding(getStringEncodingStrategy()).build(), - Granularities.NONE, - rowsPerSegment - ); - } else { - index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); - } - - final Pair sqlSystem = createSqlSystem( - ImmutableMap.of(dataSegment, index), - Collections.emptyMap(), - null, - closer - ); - - plannerFactory = sqlSystem.lhs; - engine = sqlSystem.rhs; + return QUERIES.get(Integer.parseInt(query)); } - private StringEncodingStrategy getStringEncodingStrategy() + @Override + public List getDatasources() { - return new StringEncodingStrategy.Utf8(); + return ImmutableList.of(SqlBenchmarkDatasets.BASIC); } - public static Pair createSqlSystem( - final Map segmentMap, - final Map lookupMap, - @Nullable final String storageType, - final Closer closer - ) + @Override + protected Map getContext() { - final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer, PROCESSING_CONFIG); - final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate); - final PlannerConfig plannerConfig = new PlannerConfig(); - - for (final Map.Entry segmentEntry : segmentMap.entrySet()) { - addSegmentToWalker(walker, segmentEntry.getKey(), segmentEntry.getValue(), storageType); - } - - // Child injector that adds additional lookups. - final Injector injector = new StartupInjectorBuilder() - .withEmptyProperties() - .add( - new ExpressionModule(), - new SegmentWranglerModule(), - new LookylooModule(), - new SqlAggregationModule(), - new CalciteTestOperatorModule(), - binder -> { - for (Map.Entry entry : lookupMap.entrySet()) { - MapBinder.newMapBinder(binder, String.class, LookupExtractor.class) - .addBinding(entry.getKey()) - .toProvider(entry::getValue) - .in(LazySingleton.class); - } - } - ) - .build(); - - final DruidSchemaCatalog rootSchema = - QueryFrameworkUtils.createMockRootSchema( - injector, - conglomerate, - walker, - plannerConfig, - AuthTestUtils.TEST_AUTHORIZER_MAPPER - ); - - final SqlEngine engine = CalciteTests.createMockSqlEngine(walker, conglomerate); - - final PlannerFactory plannerFactory = new PlannerFactory( - rootSchema, - createOperatorTable(injector), - injector.getInstance(ExprMacroTable.class), - plannerConfig, - AuthTestUtils.TEST_AUTHORIZER_MAPPER, - injector.getInstance(Key.get(ObjectMapper.class, Json.class)), - CalciteTests.DRUID_SCHEMA_NAME, - new CalciteRulesManager(ImmutableSet.of()), - new JoinableFactoryWrapper(QueryFrameworkUtils.createDefaultJoinableFactory(injector)), - CatalogResolver.NULL_RESOLVER, - new AuthConfig(), - new DruidHookDispatcher() - ); - - return Pair.of(plannerFactory, engine); - } - - private static void addSegmentToWalker( - final SpecificSegmentsQuerySegmentWalker walker, - final DataSegment descriptor, - final QueryableIndex index, - @Nullable final String storageType - ) - { - if (storageType == null || STORAGE_MMAP.equals(storageType)) { - walker.add(descriptor, new QueryableIndexSegment(index, descriptor.getId())); - } else if (STORAGE_FRAME_ROW.equals(storageType)) { - walker.add( - descriptor, - FrameTestUtil.cursorFactoryToFrameSegment( - new QueryableIndexCursorFactory(index), - FrameType.ROW_BASED, - descriptor.getId() - ) - ); - } else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) { - walker.add( - descriptor, - FrameTestUtil.cursorFactoryToFrameSegment( - new QueryableIndexCursorFactory(index), - FrameType.COLUMNAR, - descriptor.getId() - ) - ); - } else { - throw new IAE("Invalid storageType[%s]", storageType); - } - } - - private static DruidOperatorTable createOperatorTable(final Injector injector) - { - try { - final Set extractionOperators = new HashSet<>(); - extractionOperators.add(injector.getInstance(QueryLookupOperatorConversion.class)); - final ApproxCountDistinctSqlAggregator countDistinctSqlAggregator = - new ApproxCountDistinctSqlAggregator(new HllSketchApproxCountDistinctSqlAggregator()); - final Set aggregators = new HashSet<>( - ImmutableList.of( - new DoublesSketchApproxQuantileSqlAggregator(), - new DoublesSketchObjectSqlAggregator(), - new HllSketchApproxCountDistinctSqlAggregator(), - new HllSketchApproxCountDistinctUtf8SqlAggregator(), - new ThetaSketchApproxCountDistinctSqlAggregator(), - new CountSqlAggregator(countDistinctSqlAggregator), - countDistinctSqlAggregator - ) - ); - return new DruidOperatorTable(aggregators, extractionOperators); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - @TearDown(Level.Trial) - public void tearDown() throws Exception - { - closer.close(); - } - - public void querySql(String sql, Blackhole blackhole) - { - final Map context = ImmutableMap.of( + return ImmutableMap.of( QueryContexts.MAX_SUBQUERY_BYTES_KEY, "disabled", - QueryContexts.MAX_SUBQUERY_ROWS_KEY, -1 + QueryContexts.MAX_SUBQUERY_ROWS_KEY, -1, + QueryContexts.VECTORIZE_KEY, vectorize, + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize ); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { - final PlannerResult plannerResult = planner.plan(); - final Sequence resultSequence = plannerResult.run().getResults(); - final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); - blackhole.consume(lastRow); - } - } - - @Benchmark - public void groupByWithoutWindow(Blackhole blackhole) - { - String sql = "SELECT SUM(dimSequentialHalfNull) " - + "FROM foo " - + "GROUP BY dimUniform"; - querySql(sql, blackhole); - } - - @Benchmark - public void groupByWithWindow(Blackhole blackhole) - { - String sql = "SELECT SUM(SUM(dimSequentialHalfNull)) " - + "OVER (ORDER BY dimUniform) " - + "FROM foo " - + "GROUP BY dimUniform"; - querySql(sql, blackhole); - } - - @Benchmark - public void simpleWindow(Blackhole blackhole) - { - String sql = "SELECT ROW_NUMBER() " - + "OVER (PARTITION BY dimUniform ORDER BY dimSequential) " - + "FROM foo"; - querySql(sql, blackhole); - } - - @Benchmark - public void simpleWindowUnbounded(Blackhole blackhole) - { - String sql = "SELECT COUNT(*) " - + "OVER (PARTITION BY dimUniform RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) " - + "FROM foo"; - querySql(sql, blackhole); - } - - @Benchmark - public void windowTillCurrentRow(Blackhole blackhole) - { - String sql = "SELECT COUNT(*) " - + "OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED PRECEDING) " - + "FROM foo"; - querySql(sql, blackhole); - } - - @Benchmark - public void windowFromCurrentRow(Blackhole blackhole) - { - String sql = "SELECT COUNT(*) " - + "OVER (PARTITION BY dimUniform ORDER BY dimSequential RANGE UNBOUNDED FOLLOWING) " - + "FROM foo"; - querySql(sql, blackhole); - } - - @Benchmark - public void windowWithSorter(Blackhole blackhole) - { - String sql = "SELECT COUNT(*) " - + "OVER (PARTITION BY dimUniform ORDER BY dimSequential) " - + "FROM foo " - + "GROUP BY dimSequential, dimUniform"; - querySql(sql, blackhole); - } - - @Benchmark - public void windowWithoutSorter(Blackhole blackhole) - { - String sql = "SELECT COUNT(*) " - + "OVER (PARTITION BY dimUniform ORDER BY dimSequential) " - + "FROM foo " - + "GROUP BY dimUniform, dimSequential"; - querySql(sql, blackhole); - } - - @Benchmark - public void windowWithGroupbyTime(Blackhole blackhole) - { - String sql = "SELECT " - + "SUM(dimSequentialHalfNull) + SUM(dimHyperUnique), " - + "LAG(SUM(dimSequentialHalfNull + dimHyperUnique)) OVER (PARTITION BY dimUniform ORDER BY dimSequential) " - + "FROM foo " - + "GROUP BY __time, dimUniform, dimSequential"; - querySql(sql, blackhole); } } diff --git a/processing/src/main/java/org/apache/druid/segment/generator/GeneratorSchemaInfo.java b/processing/src/main/java/org/apache/druid/segment/generator/GeneratorSchemaInfo.java index 603e7dd0d82..74aeea727cf 100644 --- a/processing/src/main/java/org/apache/druid/segment/generator/GeneratorSchemaInfo.java +++ b/processing/src/main/java/org/apache/druid/segment/generator/GeneratorSchemaInfo.java @@ -26,7 +26,9 @@ import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; import org.joda.time.Interval; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; public class GeneratorSchemaInfo @@ -64,6 +66,20 @@ public class GeneratorSchemaInfo return new DimensionsSpec(specs); } + public DimensionsSpec getDimensionsSpecExcludeAggs() + { + final Set metricsInputs = new HashSet<>(); + for (AggregatorFactory agg : aggs) { + metricsInputs.addAll(agg.requiredFields()); + } + List specs = getColumnSchemas().stream() + .filter(x -> !x.isMetric() && !metricsInputs.contains(x.getName())) + .map(GeneratorColumnSchema::getDimensionSchema) + .collect(Collectors.toList()); + + return new DimensionsSpec(specs); + } + public List getAggs() { return aggs; diff --git a/processing/src/test/java/org/apache/druid/segment/TestHelper.java b/processing/src/test/java/org/apache/druid/segment/TestHelper.java index 8e0728730be..ef9a9111216 100644 --- a/processing/src/test/java/org/apache/druid/segment/TestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/TestHelper.java @@ -67,6 +67,11 @@ public class TestHelper return new IndexMergerV9(JSON_MAPPER, getTestIndexIO(), segmentWriteOutMediumFactory, true); } + public static IndexMergerV9 getTestIndexMergerV9(ObjectMapper jsonMapper, SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) + { + return new IndexMergerV9(jsonMapper, getTestIndexIO(jsonMapper), segmentWriteOutMediumFactory, true); + } + public static IndexMergerV9 getTestIndexMergerV9(SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, ColumnConfig columnConfig) { return new IndexMergerV9(JSON_MAPPER, getTestIndexIO(columnConfig), segmentWriteOutMediumFactory, true); @@ -82,6 +87,16 @@ public class TestHelper return new IndexIO(JSON_MAPPER, columnConfig); } + public static IndexIO getTestIndexIO(ObjectMapper jsonMapper, ColumnConfig columnConfig) + { + return new IndexIO(jsonMapper, columnConfig); + } + + public static IndexIO getTestIndexIO(ObjectMapper jsonMapper) + { + return new IndexIO(jsonMapper, ColumnConfig.SELECTION_SIZE); + } + public static AnnotationIntrospector makeAnnotationIntrospector() { // Prepare annotationIntrospector with similar logic, except skip Guice loading diff --git a/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java b/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java index 5afb9fa0412..7f04463f73e 100644 --- a/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java +++ b/processing/src/test/java/org/apache/druid/segment/generator/SegmentGenerator.java @@ -19,10 +19,12 @@ package org.apache.druid.segment.generator; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.hash.Hashing; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRowSchema; +import org.apache.druid.data.input.impl.AggregateProjectionSpec; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.MapInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; @@ -55,6 +57,7 @@ import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -114,7 +117,16 @@ public class SegmentGenerator implements Closeable final int numRows ) { - return generate(dataSegment, schemaInfo, schemaInfo.getDimensionsSpec(), TransformSpec.NONE, IndexSpec.DEFAULT, granularity, numRows); + return generate( + dataSegment, + schemaInfo, + schemaInfo.getDimensionsSpec(), + TransformSpec.NONE, + IndexSpec.DEFAULT, + granularity, + Collections.emptyList(), + numRows + ); } public QueryableIndex generate( @@ -125,7 +137,16 @@ public class SegmentGenerator implements Closeable final int numRows ) { - return generate(dataSegment, schemaInfo, schemaInfo.getDimensionsSpec(), TransformSpec.NONE, indexSpec, granularity, numRows); + return generate( + dataSegment, + schemaInfo, + schemaInfo.getDimensionsSpec(), + TransformSpec.NONE, + indexSpec, + granularity, + Collections.emptyList(), + numRows + ); } public QueryableIndex generate( @@ -137,6 +158,53 @@ public class SegmentGenerator implements Closeable final Granularity queryGranularity, final int numRows ) + { + return generate( + dataSegment, + schemaInfo, + dimensionsSpec, + transformSpec, + indexSpec, + queryGranularity, + Collections.emptyList(), + numRows + ); + } + public QueryableIndex generate( + final DataSegment dataSegment, + final GeneratorSchemaInfo schemaInfo, + final DimensionsSpec dimensionsSpec, + final TransformSpec transformSpec, + final IndexSpec indexSpec, + final Granularity queryGranularity, + final List projectionSpecs, + final int numRows + ) + { + return generate( + dataSegment, + schemaInfo, + dimensionsSpec, + transformSpec, + indexSpec, + queryGranularity, + projectionSpecs, + numRows, + TestHelper.JSON_MAPPER + ); + } + + public QueryableIndex generate( + final DataSegment dataSegment, + final GeneratorSchemaInfo schemaInfo, + final DimensionsSpec dimensionsSpec, + final TransformSpec transformSpec, + final IndexSpec indexSpec, + final Granularity queryGranularity, + final List projectionSpecs, + final int numRows, + final ObjectMapper jsonMapper + ) { // In case we need to generate hyperUniques or json ComplexMetrics.registerSerde(HyperUniquesSerde.TYPE_NAME, new HyperUniquesSerde()); @@ -149,6 +217,8 @@ public class SegmentGenerator implements Closeable .putString(dimensionsSpec.toString(), StandardCharsets.UTF_8) .putString(queryGranularity.toString(), StandardCharsets.UTF_8) .putString(indexSpec.toString(), StandardCharsets.UTF_8) + .putString(transformSpec.toString(), StandardCharsets.UTF_8) + .putString(projectionSpecs.toString(), StandardCharsets.UTF_8) .putInt(numRows) .hash() .toString(); @@ -158,7 +228,7 @@ public class SegmentGenerator implements Closeable if (outDir.exists()) { try { log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir); - return TestHelper.getTestIndexIO(ColumnConfig.DEFAULT).loadIndex(outDir); + return TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT).loadIndex(outDir); } catch (IOException e) { throw new RuntimeException(e); @@ -179,22 +249,23 @@ public class SegmentGenerator implements Closeable .withMetrics(schemaInfo.getAggsArray()) .withRollup(schemaInfo.isWithRollup()) .withQueryGranularity(queryGranularity) + .withProjections(projectionSpecs) .build(); final List rows = new ArrayList<>(); final List indexes = new ArrayList<>(); - Transformer transformer = transformSpec.toTransformer(); - InputRowSchema rowSchema = new InputRowSchema( + final Transformer transformer = transformSpec.toTransformer(); + final InputRowSchema rowSchema = new InputRowSchema( new TimestampSpec(null, null, null), dimensionsSpec, null ); for (int i = 0; i < numRows; i++) { - Map raw = dataGenerator.nextRaw(); - InputRow inputRow = MapInputRowParser.parse(rowSchema, raw); - InputRow transformedRow = transformer.transform(inputRow); + final Map raw = dataGenerator.nextRaw(); + final InputRow inputRow = MapInputRowParser.parse(rowSchema, raw); + final InputRow transformedRow = transformer.transform(inputRow); rows.add(transformedRow); if ((i + 1) % 20000 == 0) { @@ -202,7 +273,7 @@ public class SegmentGenerator implements Closeable } if (rows.size() % MAX_ROWS_IN_MEMORY == 0) { - indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec)); + indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec, jsonMapper)); rows.clear(); } } @@ -210,7 +281,7 @@ public class SegmentGenerator implements Closeable log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment); if (rows.size() > 0) { - indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec)); + indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema, indexSpec, jsonMapper)); rows.clear(); } @@ -221,9 +292,9 @@ public class SegmentGenerator implements Closeable } else { try { retVal = TestHelper - .getTestIndexIO(ColumnConfig.DEFAULT) + .getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT) .loadIndex( - TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + TestHelper.getTestIndexMergerV9(jsonMapper, OffHeapMemorySegmentWriteOutMediumFactory.instance()) .mergeQueryableIndex( indexes, false, @@ -256,7 +327,82 @@ public class SegmentGenerator implements Closeable } public IncrementalIndex generateIncrementalIndex( + final DataSegment dataSegment, + final GeneratorSchemaInfo schemaInfo, + final DimensionsSpec dimensionsSpec, + final TransformSpec transformSpec, + final AggregatorFactory[] aggregatorFactories, + final IndexSpec indexSpec, + final Granularity queryGranularity, + final List projectionSpecs, + final int numRows, + final ObjectMapper jsonMapper + ) + { + // In case we need to generate hyperUniques or json + ComplexMetrics.registerSerde(HyperUniquesSerde.TYPE_NAME, new HyperUniquesSerde()); + BuiltInTypesModule.registerHandlersAndSerde(); + final String dataHash = Hashing.sha256() + .newHasher() + .putString(dataSegment.getId().toString(), StandardCharsets.UTF_8) + .putString(schemaInfo.toString(), StandardCharsets.UTF_8) + .putString(dimensionsSpec.toString(), StandardCharsets.UTF_8) + .putString(queryGranularity.toString(), StandardCharsets.UTF_8) + .putString(indexSpec.toString(), StandardCharsets.UTF_8) + .putInt(numRows) + .hash() + .toString(); + + log.info("Writing segment with hash[%s] to incremental index.", dataHash); + + final DataGenerator dataGenerator = new DataGenerator( + schemaInfo.getColumnSchemas(), + dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */ + schemaInfo.getDataInterval(), + numRows + ); + + final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() + .withDimensionsSpec(dimensionsSpec) + .withMetrics(aggregatorFactories) + .withRollup(schemaInfo.isWithRollup()) + .withQueryGranularity(queryGranularity) + .withProjections(projectionSpecs) + .build(); + + final List rows = new ArrayList<>(); + + final Transformer transformer = transformSpec.toTransformer(); + final InputRowSchema rowSchema = new InputRowSchema( + new TimestampSpec(null, null, null), + dimensionsSpec, + null + ); + + for (int i = 0; i < numRows; i++) { + final Map raw = dataGenerator.nextRaw(); + final InputRow inputRow = MapInputRowParser.parse(rowSchema, raw); + final InputRow transformedRow = transformer.transform(inputRow); + rows.add(transformedRow); + + if ((i + 1) % 20000 == 0) { + log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment); + } + } + + log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment); + + return IndexBuilder + .create(jsonMapper) + .schema(indexSchema) + .tmpDir(new File(getSegmentDir(dataSegment.getId(), dataHash), "")) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .rows(rows) + .buildIncrementalIndex(); + } + + public IncrementalIndex generateIncrementalIndex( final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, @@ -320,11 +466,12 @@ public class SegmentGenerator implements Closeable final int indexNumber, final List rows, final IncrementalIndexSchema indexSchema, - final IndexSpec indexSpec + final IndexSpec indexSpec, + final ObjectMapper jsonMapper ) { return IndexBuilder - .create() + .create(jsonMapper) .schema(indexSchema) .indexSpec(indexSpec) .tmpDir(new File(getSegmentDir(identifier, dataHash), String.valueOf(indexNumber)))