2015-05-11 01:02:54 -04:00
|
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<!--
|
2018-07-11 12:55:18 -04:00
|
|
|
~ Licensed to the Apache Software Foundation (ASF) under one
|
|
|
|
~ or more contributor license agreements. See the NOTICE file
|
|
|
|
~ distributed with this work for additional information
|
|
|
|
~ regarding copyright ownership. The ASF licenses this file
|
|
|
|
~ to you under the Apache License, Version 2.0 (the
|
|
|
|
~ "License"); you may not use this file except in compliance
|
|
|
|
~ with the License. You may obtain a copy of the License at
|
|
|
|
~
|
|
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
~
|
|
|
|
~ Unless required by applicable law or agreed to in writing,
|
|
|
|
~ software distributed under the License is distributed on an
|
|
|
|
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
~ KIND, either express or implied. See the License for the
|
|
|
|
~ specific language governing permissions and limitations
|
|
|
|
~ under the License.
|
|
|
|
-->
|
2015-05-11 01:02:54 -04:00
|
|
|
|
2015-06-18 20:35:14 -04:00
|
|
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
2015-05-11 01:02:54 -04:00
|
|
|
<modelVersion>4.0.0</modelVersion>
|
|
|
|
|
|
|
|
<artifactId>druid-benchmarks</artifactId>
|
|
|
|
<name>druid-benchmarks</name>
|
|
|
|
<packaging>jar</packaging>
|
|
|
|
<parent>
|
2018-08-30 12:56:26 -04:00
|
|
|
<groupId>org.apache.druid</groupId>
|
2015-05-11 01:02:54 -04:00
|
|
|
<artifactId>druid</artifactId>
|
2023-10-11 01:03:45 -04:00
|
|
|
<version>29.0.0-SNAPSHOT</version>
|
2015-05-11 01:02:54 -04:00
|
|
|
</parent>
|
|
|
|
|
|
|
|
<dependencies>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.openjdk.jmh</groupId>
|
|
|
|
<artifactId>jmh-core</artifactId>
|
|
|
|
<version>${jmh.version}</version>
|
2020-02-13 17:58:32 -05:00
|
|
|
<scope>test</scope>
|
2015-05-11 01:02:54 -04:00
|
|
|
</dependency>
|
2022-05-12 01:06:20 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>commons-net</groupId>
|
|
|
|
<artifactId>commons-net</artifactId>
|
|
|
|
</dependency>
|
2015-05-11 01:02:54 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.openjdk.jmh</groupId>
|
|
|
|
<artifactId>jmh-generator-annprocess</artifactId>
|
|
|
|
<version>${jmh.version}</version>
|
|
|
|
<scope>provided</scope>
|
|
|
|
</dependency>
|
2020-09-23 16:56:38 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.easymock</groupId>
|
|
|
|
<artifactId>easymock</artifactId>
|
2024-01-12 03:06:31 -05:00
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.google.inject.extensions</groupId>
|
|
|
|
<artifactId>guice-multibindings</artifactId>
|
2020-09-23 16:56:38 -04:00
|
|
|
</dependency>
|
2015-05-11 01:02:54 -04:00
|
|
|
<dependency>
|
2018-08-30 12:56:26 -04:00
|
|
|
<groupId>org.apache.druid</groupId>
|
2015-05-11 01:02:54 -04:00
|
|
|
<artifactId>druid-processing</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
</dependency>
|
2016-05-17 12:56:00 -04:00
|
|
|
<dependency>
|
2018-08-30 12:56:26 -04:00
|
|
|
<groupId>org.apache.druid</groupId>
|
2016-05-17 12:56:00 -04:00
|
|
|
<artifactId>druid-server</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
</dependency>
|
2016-12-16 20:15:59 -05:00
|
|
|
<dependency>
|
2018-08-30 12:56:26 -04:00
|
|
|
<groupId>org.apache.druid</groupId>
|
2016-12-16 20:15:59 -05:00
|
|
|
<artifactId>druid-sql</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
</dependency>
|
2019-01-17 17:51:16 -05:00
|
|
|
<dependency>
|
2019-07-29 14:42:16 -04:00
|
|
|
<groupId>org.apache.druid.extensions</groupId>
|
|
|
|
<artifactId>druid-datasketches</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
</dependency>
|
2019-01-17 17:51:16 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.druid.extensions</groupId>
|
|
|
|
<artifactId>druid-histogram</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
Query vectorization. (#6794)
* Benchmarks: New SqlBenchmark, add caching & vectorization to some others.
- Introduce a new SqlBenchmark geared towards benchmarking a wide
variety of SQL queries. Rename the old SqlBenchmark to
SqlVsNativeBenchmark.
- Add (optional) caching to SegmentGenerator to enable easier
benchmarking of larger segments.
- Add vectorization to FilteredAggregatorBenchmark and GroupByBenchmark.
* Query vectorization.
This patch includes vectorized timeseries and groupBy engines, as well
as some analogs of your favorite Druid classes:
- VectorCursor is like Cursor. (It comes from StorageAdapter.makeVectorCursor.)
- VectorColumnSelectorFactory is like ColumnSelectorFactory, and it has
methods to create analogs of the column selectors you know and love.
- VectorOffset and ReadableVectorOffset are like Offset and ReadableOffset.
- VectorAggregator is like BufferAggregator.
- VectorValueMatcher is like ValueMatcher.
There are some noticeable differences between vectorized and regular
execution:
- Unlike regular cursors, vector cursors do not understand time
granularity. They expect query engines to handle this on their own,
which a new VectorCursorGranularizer class helps with. This is to
avoid too much batch-splitting and to respect the fact that vector
selectors are somewhat more heavyweight than regular selectors.
- Unlike FilteredOffset, FilteredVectorOffset does not leverage indexes
for filters that might partially support them (like an OR of one
filter that supports indexing and another that doesn't). I'm not sure
that this behavior is desirable anyway (it is potentially too eager)
but, at any rate, it'd be better to harmonize it between the two
classes. Potentially they should both do some different thing that
is smarter than what either of them is doing right now.
- When vector cursors are created by QueryableIndexCursorSequenceBuilder,
they use a morphing binary-then-linear search to find their start and
end rows, rather than linear search.
Limitations in this patch are:
- Only timeseries and groupBy have vectorized engines.
- GroupBy doesn't handle multi-value dimensions yet.
- Vector cursors cannot handle virtual columns or descending order.
- Only some filters have vectorized matchers: "selector", "bound", "in",
"like", "regex", "search", "and", "or", and "not".
- Only some aggregators have vectorized implementations: "count",
"doubleSum", "floatSum", "longSum", "hyperUnique", and "filtered".
- Dimension specs other than "default" don't work yet (no extraction
functions or filtered dimension specs).
Currently, the testing strategy includes adding vectorization-enabled
tests to TimeseriesQueryRunnerTest, GroupByQueryRunnerTest,
GroupByTimeseriesQueryRunnerTest, CalciteQueryTest, and all of the
filtering tests that extend BaseFilterTest. In all of those classes,
there are some test cases that don't support vectorization. They are
marked by special function calls like "cannotVectorize" or "skipVectorize"
that tell the test harness to either expect an exception or to skip the
test case.
Testing should be expanded in the future -- a project in and of itself.
Related to #3011.
* WIP
* Adjustments for unused things.
* Adjust javadocs.
* DimensionDictionarySelector adjustments.
* Add "clone" to BatchIteratorAdapter.
* ValueMatcher javadocs.
* Fix benchmark.
* Fixups post-merge.
* Expect exception on testGroupByWithStringVirtualColumn for IncrementalIndex.
* BloomDimFilterSqlTest: Tag two non-vectorizable tests.
* Minor adjustments.
* Update surefire, bump up Xmx in Travis.
* Some more adjustments.
* Javadoc adjustments
* AggregatorAdapters adjustments.
* Additional comments.
* Remove switching search.
* Only missiles.
2019-07-12 15:54:07 -04:00
|
|
|
</dependency>
|
2020-09-17 18:05:40 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.druid.extensions</groupId>
|
|
|
|
<artifactId>druid-stats</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
</dependency>
|
2016-12-16 20:15:59 -05:00
|
|
|
<dependency>
|
2018-08-30 12:56:26 -04:00
|
|
|
<groupId>org.apache.druid</groupId>
|
2016-12-16 20:15:59 -05:00
|
|
|
<artifactId>druid-processing</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
<type>test-jar</type>
|
|
|
|
</dependency>
|
Query vectorization. (#6794)
* Benchmarks: New SqlBenchmark, add caching & vectorization to some others.
- Introduce a new SqlBenchmark geared towards benchmarking a wide
variety of SQL queries. Rename the old SqlBenchmark to
SqlVsNativeBenchmark.
- Add (optional) caching to SegmentGenerator to enable easier
benchmarking of larger segments.
- Add vectorization to FilteredAggregatorBenchmark and GroupByBenchmark.
* Query vectorization.
This patch includes vectorized timeseries and groupBy engines, as well
as some analogs of your favorite Druid classes:
- VectorCursor is like Cursor. (It comes from StorageAdapter.makeVectorCursor.)
- VectorColumnSelectorFactory is like ColumnSelectorFactory, and it has
methods to create analogs of the column selectors you know and love.
- VectorOffset and ReadableVectorOffset are like Offset and ReadableOffset.
- VectorAggregator is like BufferAggregator.
- VectorValueMatcher is like ValueMatcher.
There are some noticeable differences between vectorized and regular
execution:
- Unlike regular cursors, vector cursors do not understand time
granularity. They expect query engines to handle this on their own,
which a new VectorCursorGranularizer class helps with. This is to
avoid too much batch-splitting and to respect the fact that vector
selectors are somewhat more heavyweight than regular selectors.
- Unlike FilteredOffset, FilteredVectorOffset does not leverage indexes
for filters that might partially support them (like an OR of one
filter that supports indexing and another that doesn't). I'm not sure
that this behavior is desirable anyway (it is potentially too eager)
but, at any rate, it'd be better to harmonize it between the two
classes. Potentially they should both do some different thing that
is smarter than what either of them is doing right now.
- When vector cursors are created by QueryableIndexCursorSequenceBuilder,
they use a morphing binary-then-linear search to find their start and
end rows, rather than linear search.
Limitations in this patch are:
- Only timeseries and groupBy have vectorized engines.
- GroupBy doesn't handle multi-value dimensions yet.
- Vector cursors cannot handle virtual columns or descending order.
- Only some filters have vectorized matchers: "selector", "bound", "in",
"like", "regex", "search", "and", "or", and "not".
- Only some aggregators have vectorized implementations: "count",
"doubleSum", "floatSum", "longSum", "hyperUnique", and "filtered".
- Dimension specs other than "default" don't work yet (no extraction
functions or filtered dimension specs).
Currently, the testing strategy includes adding vectorization-enabled
tests to TimeseriesQueryRunnerTest, GroupByQueryRunnerTest,
GroupByTimeseriesQueryRunnerTest, CalciteQueryTest, and all of the
filtering tests that extend BaseFilterTest. In all of those classes,
there are some test cases that don't support vectorization. They are
marked by special function calls like "cannotVectorize" or "skipVectorize"
that tell the test harness to either expect an exception or to skip the
test case.
Testing should be expanded in the future -- a project in and of itself.
Related to #3011.
* WIP
* Adjustments for unused things.
* Adjust javadocs.
* DimensionDictionarySelector adjustments.
* Add "clone" to BatchIteratorAdapter.
* ValueMatcher javadocs.
* Fix benchmark.
* Fixups post-merge.
* Expect exception on testGroupByWithStringVirtualColumn for IncrementalIndex.
* BloomDimFilterSqlTest: Tag two non-vectorizable tests.
* Minor adjustments.
* Update surefire, bump up Xmx in Travis.
* Some more adjustments.
* Javadoc adjustments
* AggregatorAdapters adjustments.
* Additional comments.
* Remove switching search.
* Only missiles.
2019-07-12 15:54:07 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.druid</groupId>
|
|
|
|
<artifactId>druid-server</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
<type>test-jar</type>
|
|
|
|
</dependency>
|
2016-12-16 20:15:59 -05:00
|
|
|
<dependency>
|
2018-08-30 12:56:26 -04:00
|
|
|
<groupId>org.apache.druid</groupId>
|
2016-12-16 20:15:59 -05:00
|
|
|
<artifactId>druid-sql</artifactId>
|
|
|
|
<version>${project.parent.version}</version>
|
|
|
|
<type>test-jar</type>
|
|
|
|
</dependency>
|
2020-02-04 14:33:48 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.calcite</groupId>
|
|
|
|
<artifactId>calcite-core</artifactId>
|
|
|
|
<scope>provided</scope>
|
|
|
|
</dependency>
|
2015-12-09 18:35:26 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>com.github.wnameless</groupId>
|
|
|
|
<artifactId>json-flattener</artifactId>
|
|
|
|
<version>0.1.0</version>
|
|
|
|
</dependency>
|
2019-11-06 14:07:04 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>com.fasterxml.jackson.core</groupId>
|
|
|
|
<artifactId>jackson-core</artifactId>
|
|
|
|
</dependency>
|
2017-06-06 00:29:08 -04:00
|
|
|
<dependency>
|
2019-09-09 17:37:21 -04:00
|
|
|
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
|
|
|
<artifactId>jackson-dataformat-smile</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.commons</groupId>
|
|
|
|
<artifactId>commons-math3</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.fasterxml.jackson.core</groupId>
|
|
|
|
<artifactId>jackson-annotations</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>joda-time</groupId>
|
|
|
|
<artifactId>joda-time</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.fasterxml.jackson.core</groupId>
|
|
|
|
<artifactId>jackson-databind</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>it.unimi.dsi</groupId>
|
2021-06-10 10:43:18 -04:00
|
|
|
<artifactId>fastutil-core</artifactId>
|
2019-09-09 17:37:21 -04:00
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.roaringbitmap</groupId>
|
|
|
|
<artifactId>RoaringBitmap</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.google.guava</groupId>
|
|
|
|
<artifactId>guava</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.google.code.findbugs</groupId>
|
|
|
|
<artifactId>jsr305</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
2019-11-25 22:45:51 -05:00
|
|
|
<groupId>org.apache.datasketches</groupId>
|
|
|
|
<artifactId>datasketches-java</artifactId>
|
2020-02-04 22:58:00 -05:00
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.datasketches</groupId>
|
|
|
|
<artifactId>datasketches-memory</artifactId>
|
2017-06-06 00:29:08 -04:00
|
|
|
</dependency>
|
2021-08-31 17:56:37 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>com.google.inject</groupId>
|
|
|
|
<artifactId>guice</artifactId>
|
|
|
|
</dependency>
|
2022-05-12 01:06:20 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>com.github.seancfoley</groupId>
|
|
|
|
<artifactId>ipaddress</artifactId>
|
|
|
|
<version>5.3.4</version>
|
|
|
|
</dependency>
|
2015-12-09 18:35:26 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>junit</groupId>
|
|
|
|
<artifactId>junit</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2020-06-24 21:01:31 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.druid.extensions</groupId>
|
|
|
|
<artifactId>druid-protobuf-extensions</artifactId>
|
2020-09-17 18:05:40 -04:00
|
|
|
<version>${project.parent.version}</version>
|
2020-06-24 21:01:31 -04:00
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2015-05-11 01:02:54 -04:00
|
|
|
</dependencies>
|
|
|
|
|
|
|
|
<properties>
|
|
|
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
2019-06-17 07:27:09 -04:00
|
|
|
<jmh.version>1.21</jmh.version>
|
2017-06-06 00:29:08 -04:00
|
|
|
<javac.target>1.8</javac.target>
|
2015-05-11 01:02:54 -04:00
|
|
|
<uberjar.name>benchmarks</uberjar.name>
|
|
|
|
</properties>
|
|
|
|
|
|
|
|
<build>
|
|
|
|
<plugins>
|
|
|
|
<plugin>
|
|
|
|
<groupId>org.apache.maven.plugins</groupId>
|
2020-02-13 17:58:32 -05:00
|
|
|
<artifactId>maven-assembly-plugin</artifactId>
|
2022-05-02 12:43:19 -04:00
|
|
|
<version>3.1.0</version>
|
2015-05-11 01:02:54 -04:00
|
|
|
<executions>
|
|
|
|
<execution>
|
|
|
|
<phase>package</phase>
|
|
|
|
<goals>
|
2020-02-13 17:58:32 -05:00
|
|
|
<goal>single</goal>
|
2015-05-11 01:02:54 -04:00
|
|
|
</goals>
|
|
|
|
<configuration>
|
|
|
|
<finalName>${uberjar.name}</finalName>
|
2020-02-13 17:58:32 -05:00
|
|
|
<appendAssemblyId>false</appendAssemblyId>
|
2022-05-02 12:43:19 -04:00
|
|
|
<descriptors>
|
|
|
|
<descriptor>assembly.xml</descriptor>
|
|
|
|
</descriptors>
|
2020-02-13 17:58:32 -05:00
|
|
|
<archive>
|
|
|
|
<manifest>
|
2015-05-11 01:02:54 -04:00
|
|
|
<mainClass>org.openjdk.jmh.Main</mainClass>
|
2020-02-13 17:58:32 -05:00
|
|
|
</manifest>
|
|
|
|
</archive>
|
2015-05-11 01:02:54 -04:00
|
|
|
</configuration>
|
|
|
|
</execution>
|
|
|
|
</executions>
|
|
|
|
</plugin>
|
2019-12-05 17:34:35 -05:00
|
|
|
<plugin>
|
|
|
|
<groupId>org.owasp</groupId>
|
|
|
|
<artifactId>dependency-check-maven</artifactId>
|
|
|
|
<configuration>
|
|
|
|
<skip>true</skip>
|
|
|
|
</configuration>
|
|
|
|
</plugin>
|
2020-05-20 12:31:37 -04:00
|
|
|
<plugin>
|
|
|
|
<groupId>org.jacoco</groupId>
|
|
|
|
<artifactId>jacoco-maven-plugin</artifactId>
|
|
|
|
<configuration>
|
|
|
|
<skip>true</skip> <!-- ignore non-production code -->
|
|
|
|
</configuration>
|
|
|
|
</plugin>
|
2015-05-11 01:02:54 -04:00
|
|
|
</plugins>
|
|
|
|
<pluginManagement>
|
|
|
|
<plugins>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-clean-plugin</artifactId>
|
|
|
|
<version>2.5</version>
|
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-deploy-plugin</artifactId>
|
|
|
|
<version>2.8.1</version>
|
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-install-plugin</artifactId>
|
|
|
|
<version>2.5.1</version>
|
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-jar-plugin</artifactId>
|
|
|
|
<version>2.4</version>
|
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-javadoc-plugin</artifactId>
|
|
|
|
<version>2.9.1</version>
|
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-resources-plugin</artifactId>
|
2021-08-02 12:38:34 -04:00
|
|
|
<version>3.2.0</version>
|
2015-05-11 01:02:54 -04:00
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-site-plugin</artifactId>
|
2022-03-17 03:17:29 -04:00
|
|
|
<version>3.11.0</version>
|
2015-05-11 01:02:54 -04:00
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-source-plugin</artifactId>
|
2023-08-15 10:39:19 -04:00
|
|
|
<version>3.3.0</version>
|
2015-05-11 01:02:54 -04:00
|
|
|
</plugin>
|
|
|
|
<plugin>
|
|
|
|
<artifactId>maven-surefire-plugin</artifactId>
|
|
|
|
</plugin>
|
|
|
|
</plugins>
|
|
|
|
</pluginManagement>
|
|
|
|
</build>
|
|
|
|
|
|
|
|
</project>
|