Merge branch 'rename-d1-dbl1' into unnest-relfieldtrimmer-unnestfieldtype

This commit is contained in:
Zoltan Haindrich 2024-12-03 03:45:04 +00:00
commit e67b609eb6
372 changed files with 8855 additions and 3529 deletions

View File

@ -40,7 +40,7 @@ jobs:
- uses: actions/setup-java@v4
with:
distribution: 'zulu'
java-version: '8'
java-version: '17'
cache: 'maven'
# Initializes the CodeQL tools for scanning.

View File

@ -28,7 +28,7 @@ on:
jobs:
build:
if: (github.event_name == 'schedule' && github.repository == 'apache/druid')
name: build (jdk8)
name: build (jdk17)
runs-on: ubuntu-latest
steps:
- name: Checkout branch
@ -37,7 +37,7 @@ jobs:
- name: setup java
uses: actions/setup-java@v4
with:
java-version: '8'
java-version: '17'
distribution: 'zulu'
- name: Cache Maven m2 repository
@ -60,8 +60,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
needs: build
with:
build_jdk: 8
runtime_jdk: 11
build_jdk: 17
runtime_jdk: 21.0.4
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: middleManager
group: ${{ matrix.testing_group }}
@ -74,8 +74,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
needs: build
with:
build_jdk: 8
runtime_jdk: 11
build_jdk: 17
runtime_jdk: 21.0.4
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: indexer
group: ${{ matrix.testing_group }}
@ -88,8 +88,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
needs: build
with:
build_jdk: 8
runtime_jdk: 11
build_jdk: 17
runtime_jdk: 21.0.4
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: middleManager
override_config_path: ./environment-configs/test-groups/prepopulated-data
@ -103,8 +103,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
needs: build
with:
build_jdk: 8
runtime_jdk: 11
build_jdk: 17
runtime_jdk: 21.0.4
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties
use_indexer: ${{ matrix.indexer }}
group: other
@ -122,7 +122,7 @@ jobs:
- name: setup java
uses: actions/setup-java@v4
with:
java-version: '8'
java-version: '17'
distribution: 'zulu'
cache: maven

View File

@ -66,8 +66,8 @@ jobs:
strategy:
fail-fast: false
matrix:
#jdk: [8, 11, 17]
jdk: [8]
# jdk: [11, 17]
jdk: [17]
it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat, Security, Query]
#indexer: [indexer, middleManager]
indexer: [middleManager]
@ -86,8 +86,8 @@ jobs:
uses: ./.github/workflows/reusable-revised-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 11
build_jdk: 17
runtime_jdk: 21.0.4
use_indexer: middleManager
script: ./it.sh github S3DeepStorage
it: S3DeepStorage
@ -103,8 +103,8 @@ jobs:
uses: ./.github/workflows/reusable-revised-its.yml
if: ${{ inputs.BACKWARD_COMPATIBILITY_IT_ENABLED == 'true' && (needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true') }}
with:
build_jdk: 8
runtime_jdk: 8
build_jdk: 17
runtime_jdk: 17
use_indexer: middleManager
script: ./it.sh github BackwardCompatibilityMain
it: BackwardCompatibilityMain

View File

@ -51,8 +51,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
build_jdk: 17
runtime_jdk: 17
testing_groups: -Dgroups=${{ matrix.testing_group }}
override_config_path: ./environment-configs/test-groups/prepopulated-data
use_indexer: middleManager
@ -67,8 +67,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
build_jdk: 17
runtime_jdk: 17
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: indexer
group: ${{ matrix.testing_group }}
@ -82,8 +82,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
build_jdk: 17
runtime_jdk: 17
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: middleManager
override_config_path: ./environment-configs/test-groups/prepopulated-data
@ -94,7 +94,7 @@ jobs:
strategy:
fail-fast: false
matrix:
jdk: [8, 17, 21]
jdk: [11, 17, 21]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
@ -115,8 +115,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
build_jdk: 17
runtime_jdk: 17
testing_groups: -Dgroups=shuffle-deep-store
use_indexer: ${{ matrix.indexer }}
override_config_path: ./environment-configs/test-groups/shuffle-deep-store
@ -127,8 +127,8 @@ jobs:
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
build_jdk: 17
runtime_jdk: 17
testing_groups: -Dgroups=custom-coordinator-duties
use_indexer: middleManager
override_config_path: ./environment-configs/test-groups/custom-coordinator-duties
@ -136,7 +136,7 @@ jobs:
integration-k8s-leadership-tests:
needs: changes
name: (Compile=openjdk8, Run=openjdk8, Cluster Build On K8s) ITNestedQueryPushDownTest integration test
name: (Compile=openjdk17, Run=openjdk17, Cluster Build On K8s) ITNestedQueryPushDownTest integration test
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
runs-on: ubuntu-22.04
env:
@ -154,7 +154,7 @@ jobs:
- name: setup java
uses: actions/setup-java@v4
with:
java-version: '8'
java-version: '17'
distribution: 'zulu'
# the build step produces SNAPSHOT artifacts into the local maven repository,
@ -195,8 +195,8 @@ jobs:
indexer: [middleManager, indexer]
uses: ./.github/workflows/reusable-standard-its.yml
with:
build_jdk: 8
runtime_jdk: 8
build_jdk: 17
runtime_jdk: 17
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-datasource-schema,cds-task-schema-publish-disabled,cds-coordinator-metadata-query-disabled
use_indexer: ${{ matrix.indexer }}
group: other

View File

@ -42,7 +42,7 @@ jobs:
fail-fast: false
matrix:
# Use JDK 21.0.4 to work around https://github.com/apache/druid/issues/17429
java: [ '8', '11', '17', '21.0.4' ]
java: [ '11', '17', '21.0.4' ]
runs-on: ubuntu-latest
steps:
- name: checkout branch
@ -64,7 +64,7 @@ jobs:
- name: script checks
# who watches the watchers?
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ./check_test_suite_test.py
- name: (openjdk17) strict compilation
@ -74,43 +74,43 @@ jobs:
run: ${MVN} clean -DstrictCompile compile test-compile --fail-at-end ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
- name: maven install
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: |
echo 'Running Maven install...' &&
${MVN} clean install -q -ff -pl '!distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C &&
${MVN} install -q -ff -pl 'distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
- name: checkstyle
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ${MVN} checkstyle:checkstyle --fail-at-end
- name: license checks
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ./.github/scripts/license_checks_script.sh
- name: analyze dependencies
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: |
./.github/scripts/analyze_dependencies_script.sh
- name: animal sniffer checks
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ${MVN} animal-sniffer:check --fail-at-end
- name: enforcer checks
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ${MVN} enforcer:enforce --fail-at-end
- name: forbidden api checks
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ${MVN} forbiddenapis:check forbiddenapis:testCheck --fail-at-end
- name: pmd checks
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ${MVN} pmd:check --fail-at-end # TODO: consider adding pmd:cpd-check
- name: spotbugs checks
if: ${{ matrix.java == '8' }}
if: ${{ matrix.java == '17' }}
run: ${MVN} spotbugs:check --fail-at-end -pl '!benchmarks'
openrewrite:
@ -122,7 +122,7 @@ jobs:
- uses: actions/setup-java@v4
with:
distribution: 'zulu'
java-version: '8'
java-version: '17'
cache: 'maven'
- name: maven install

View File

@ -80,7 +80,7 @@ jobs:
fail-fast: false
matrix:
# Use JDK 21.0.4 to work around https://github.com/apache/druid/issues/17429
jdk: [ '8', '11', '17', '21.0.4' ]
jdk: [ '11', '17', '21.0.4' ]
runs-on: ubuntu-latest
steps:
- name: Checkout branch
@ -162,7 +162,7 @@ jobs:
fail-fast: false
matrix:
# Use JDK 21.0.4 to work around https://github.com/apache/druid/issues/17429
jdk: [ '11', '17', '21.0.4' ]
jdk: [ '11', '21.0.4' ]
name: "unit tests (jdk${{ matrix.jdk }}, sql-compat=true)"
uses: ./.github/workflows/unit-tests.yml
needs: unit-tests
@ -176,11 +176,11 @@ jobs:
fail-fast: false
matrix:
sql_compatibility: [ false, true ]
name: "unit tests (jdk8, sql-compat=${{ matrix.sql_compatibility }})"
name: "unit tests (jdk17, sql-compat=${{ matrix.sql_compatibility }})"
uses: ./.github/workflows/unit-tests.yml
needs: build
with:
jdk: 8
jdk: 17
sql_compatibility: ${{ matrix.sql_compatibility }}
standard-its:

View File

@ -84,7 +84,7 @@
<resource url="http://maven.apache.org/ASSEMBLY/2.0.0" location="$PROJECT_DIR$/.idea/xml-schemas/assembly-2.0.0.xsd" />
<resource url="http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" location="$PROJECT_DIR$/.idea/xml-schemas/svg11.dtd" />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="false" project-jdk-name="17" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/classes" />
</component>
</project>

View File

@ -106,7 +106,7 @@ Find articles written by community members and a calendar of upcoming events on
### Building from source
Please note that JDK 8 or JDK 11 is required to build Druid.
Please note that JDK 11 or JDK 17 is required to build Druid.
See the latest [build guide](https://druid.apache.org/docs/latest/development/build.html) for instructions on building Apache Druid from source.

View File

@ -64,6 +64,7 @@
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
@ -239,7 +240,6 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jmh.version>1.21</jmh.version>
<javac.target>1.8</javac.target>
<uberjar.name>benchmarks</uberjar.name>
</properties>

View File

@ -37,6 +37,7 @@ import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnType;
@ -115,7 +116,8 @@ public class GroupByDeserializationBenchmark
}
},
null,
null
null,
new GroupByStatsProvider()
);
decoratedMapper = groupByQueryQueryToolChest.decorateObjectMapper(undecoratedMapper, sqlQuery);

View File

@ -55,6 +55,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.GroupingEngine;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
@ -373,6 +374,7 @@ public class GroupByTypeInterfaceBenchmark
};
final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
final GroupByResourcesReservationPool groupByResourcesReservationPool =
new GroupByResourcesReservationPool(mergePool, config);
final GroupingEngine groupingEngine = new GroupingEngine(
@ -381,7 +383,8 @@ public class GroupByTypeInterfaceBenchmark
groupByResourcesReservationPool,
TestHelper.makeJsonMapper(),
new ObjectMapper(new SmileFactory()),
QueryBenchmarkUtil.NOOP_QUERYWATCHER
QueryBenchmarkUtil.NOOP_QUERYWATCHER,
groupByStatsProvider
);
factory = new GroupByQueryRunnerFactory(

View File

@ -79,6 +79,7 @@ import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
import org.apache.druid.query.groupby.GroupByQueryRunnerTest;
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.GroupingEngine;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.planning.DataSourceAnalysis;
@ -343,6 +344,7 @@ public class CachingClusteredClientBenchmark
bufferSupplier,
processingConfig.getNumMergeBuffers()
);
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
final GroupByResourcesReservationPool groupByResourcesReservationPool =
new GroupByResourcesReservationPool(mergeBufferPool, config);
final GroupingEngine groupingEngine = new GroupingEngine(
@ -351,7 +353,8 @@ public class CachingClusteredClientBenchmark
groupByResourcesReservationPool,
mapper,
mapper,
QueryRunnerTestHelper.NOOP_QUERYWATCHER
QueryRunnerTestHelper.NOOP_QUERYWATCHER,
groupByStatsProvider
);
final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool);
return new GroupByQueryRunnerFactory(groupingEngine, toolChest, bufferPool);

View File

@ -66,6 +66,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.GroupingEngine;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
@ -490,6 +491,7 @@ public class GroupByBenchmark
};
final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
final GroupByResourcesReservationPool groupByResourcesReservationPool =
new GroupByResourcesReservationPool(mergePool, config);
final GroupingEngine groupingEngine = new GroupingEngine(
@ -498,7 +500,8 @@ public class GroupByBenchmark
groupByResourcesReservationPool,
TestHelper.makeJsonMapper(),
new ObjectMapper(new SmileFactory()),
QueryBenchmarkUtil.NOOP_QUERYWATCHER
QueryBenchmarkUtil.NOOP_QUERYWATCHER,
groupByStatsProvider
);
factory = new GroupByQueryRunnerFactory(

View File

@ -22,7 +22,7 @@ import sys
# this script does some primitive examination of git diff to determine if a test suite needs to be run or not
# these jobs should always be run, no matter what
always_run_jobs = ['license checks', '(openjdk8) packaging check', '(openjdk11) packaging check']
always_run_jobs = ['license checks', '(openjdk17) packaging check']
# ignore changes to these files completely since they don't impact CI, if the changes are only to these files then all
# of CI can be skipped. however, jobs which are always run will still be run even if only these files are changed

View File

@ -29,5 +29,4 @@ This ruleset defines the PMD rules for the Apache Druid project.
<rule ref="category/java/codestyle.xml/UnnecessaryImport" />
<rule ref="category/java/codestyle.xml/TooManyStaticImports" />
<rule ref="category/java/codestyle.xml/UnnecessaryFullyQualifiedName"/>
</ruleset>

View File

@ -137,4 +137,15 @@
<Bug pattern="SWL_SLEEP_WITH_LOCK_HELD"/>
<Bug pattern="UL_UNRELEASED_LOCK_EXCEPTION_PATH"/>
<Bug pattern="URF_UNREAD_FIELD"/>
<!-- The following patterns have been excluded as part of upgrading to Java 17 as there were 100s of occurrences.
We should revisit these later. -->
<Bug pattern="CT_CONSTRUCTOR_THROW"/>
<Bug pattern="SING_SINGLETON_HAS_NONPRIVATE_CONSTRUCTOR"/>
<Bug pattern="DCN_NULLPOINTER_EXCEPTION"/>
<Bug pattern="SING_SINGLETON_INDIRECTLY_IMPLEMENTS_CLONEABLE"/>
<Bug pattern="MS_EXPOSE_REP"/>
<Bug pattern="PA_PUBLIC_PRIMITIVE_ATTRIBUTE"/>
<Bug pattern="EI_EXPOSE_STATIC_REP2"/>
<Bug pattern="SS_SHOULD_BE_STATIC"/>
<Bug pattern="SING_SINGLETON_IMPLEMENTS_SERIALIZABLE"/>
</FindBugsFilter>

View File

@ -23,7 +23,7 @@ ARG JDK_VERSION=17
# This is because it's not able to build the distribution on arm64 due to dependency problem of web-console. See: https://github.com/apache/druid/issues/13012
# Since only java jars are shipped in the final image, it's OK to build the distribution on x64.
# Once the web-console dependency problem is resolved, we can remove the --platform directive.
FROM --platform=linux/amd64 maven:3.8.6-jdk-11-slim as builder
FROM --platform=linux/amd64 maven:3.8.4-openjdk-17-slim as builder
# Rebuild from source in this stage
# This can be unset if the tarball was already built outside of Docker

View File

@ -195,9 +195,9 @@ and `druid.tlsPort` properties on each service. Please see `Configuration` secti
Druid uses Jetty as an embedded web server. To learn more about TLS/SSL, certificates, and related concepts in Jetty, including explanations of the configuration settings below, see "Configuring SSL/TLS KeyStores" in the [Jetty Operations Guide](https://www.eclipse.org/jetty/documentation.php).
For information about TLS/SSL support in Java in general, see the [Java Secure Socket Extension (JSSE) Reference Guide](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html).
For information about TLS/SSL support in Java in general, see the [Java Secure Socket Extension (JSSE) Reference Guide](https://docs.oracle.com/en/java/javase/11/security/java-secure-socket-extension-jsse-reference-guide.html).
The [Java Cryptography Architecture
Standard Algorithm Name Documentation for JDK 8](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all possible
Standard Algorithm Name Documentation for JDK 11](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all possible
values for the following properties, among others provided by the Java implementation.
|Property|Description|Default|Required|
@ -230,7 +230,7 @@ These properties apply to the SSLContext that will be provided to the internal H
|`druid.client.https.trustStoreAlgorithm`|Algorithm to be used by TrustManager to validate certificate chains|`javax.net.ssl.TrustManagerFactory.getDefaultAlgorithm()`|no|
|`druid.client.https.trustStorePassword`|The [Password Provider](../operations/password-provider.md) or String password for the Trust Store.|none|yes|
This [document](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all the possible
This [document](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all the possible
values for the above mentioned configs among others provided by Java implementation.
### Authentication and authorization
@ -408,6 +408,7 @@ Metric monitoring is an essential part of Druid operations. The following monito
|`org.apache.druid.server.metrics.TaskSlotCountStatsMonitor`|Reports metrics about task slot usage per emission period.|
|`org.apache.druid.server.metrics.WorkerTaskCountStatsMonitor`|Reports how many ingestion tasks are currently running/pending/waiting, the number of successful/failed tasks, and metrics about task slot usage for the reporting worker, per emission period. Only supported by Middle Manager node types.|
|`org.apache.druid.server.metrics.ServiceStatusMonitor`|Reports a heartbeat for the service.|
|`org.apache.druid.server.metrics.GroupByStatsMonitor`|Report metrics for groupBy queries like disk and merge buffer utilization. |
For example, you might configure monitors on all services for system and JVM information within `common.runtime.properties` as follows:

View File

@ -23,9 +23,9 @@ title: "Simple SSLContext Provider Module"
-->
This Apache Druid module contains a simple implementation of [SSLContext](http://docs.oracle.com/javase/8/docs/api/javax/net/ssl/SSLContext.html)
This Apache Druid module contains a simple implementation of [SSLContext](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/javax/net/ssl/SSLContext.html)
that will be injected to be used with HttpClient that Druid processes use internally to communicate with each other. To learn more about
Java's SSL support, please refer to [this](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html) guide.
Java's SSL support, please refer to [this](https://docs.oracle.com/en/java/javase/11/security/java-secure-socket-extension-jsse-reference-guide.html) guide.
|Property|Description|Default|Required|
@ -48,5 +48,5 @@ The following table contains optional parameters for supporting client certifica
|`druid.client.https.keyManagerPassword`|The [Password Provider](../../operations/password-provider.md) or String password for the Key Manager.|none|no|
|`druid.client.https.validateHostnames`|Validate the hostname of the server. This should not be disabled unless you are using [custom TLS certificate checks](../../operations/tls-support.md) and know that standard hostname validation is not needed.|true|no|
This [document](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all the possible
This [document](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all the possible
values for the above mentioned configs among others provided by Java implementation.

View File

@ -184,7 +184,7 @@ Sample specs:
|uris|JSON array of URIs where S3 objects to be ingested are located.|None|`uris` or `prefixes` or `objects` must be set|
|prefixes|JSON array of URI prefixes for the locations of S3 objects to be ingested. Empty objects starting with one of the given prefixes will be skipped.|None|`uris` or `prefixes` or `objects` must be set|
|objects|JSON array of S3 Objects to be ingested.|None|`uris` or `prefixes` or `objects` must be set|
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (S3 URI starting with `s3://`), `__file_bucket` (S3 bucket), and `__file_path` (S3 object key).|None|no|
| endpointConfig |Config for overriding the default S3 endpoint and signing region. This would allow ingesting data from a different S3 store. Please see [s3 config](../development/extensions-core/s3.md#connecting-to-s3-configuration) for more information.|None|No (defaults will be used if not given)
| clientConfig |S3 client properties for the overridden s3 endpoint. This is used in conjunction with `endPointConfig`. Please see [s3 config](../development/extensions-core/s3.md#connecting-to-s3-configuration) for more information.|None|No (defaults will be used if not given)
@ -289,7 +289,7 @@ Sample specs:
|uris|JSON array of URIs where Google Cloud Storage objects to be ingested are located.|None|`uris` or `prefixes` or `objects` must be set|
|prefixes|JSON array of URI prefixes for the locations of Google Cloud Storage objects to be ingested. Empty objects starting with one of the given prefixes will be skipped.|None|`uris` or `prefixes` or `objects` must be set|
|objects|JSON array of Google Cloud Storage objects to be ingested.|None|`uris` or `prefixes` or `objects` must be set|
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
Note that the Google Cloud Storage input source will skip all empty objects only when `prefixes` is specified.
@ -377,7 +377,7 @@ Sample specs:
|uris|JSON array of URIs where the Azure objects to be ingested are located. Use this format: `azureStorage://STORAGE_ACCOUNT/CONTAINER/PATH_TO_FILE`|None|One of the following must be set:`uris`, `prefixes`, or `objects`.|
|prefixes|JSON array of URI prefixes for the locations of Azure objects to ingest. Use this format`azureStorage://STORAGE_ACCOUNT/CONTAINER/PREFIX`. Empty objects starting with any of the given prefixes are skipped.|None|One of the following must be set:`uris`, `prefixes`, or `objects`.|
|objects|JSON array of Azure objects to ingest.|None|One of the following must be set:`uris`, `prefixes`, or `objects`.|
|objectGlob|A glob for the object part of the Azure URI. In the URI `azureStorage://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azureStorage://foo/bar/file.json` because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|objectGlob|A glob for the object part of the Azure URI. In the URI `azureStorage://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azureStorage://foo/bar/file.json` because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (Azure blob URI starting with `azureStorage://`), `__file_bucket` (Azure bucket), and `__file_path` (Azure object path).|None|no|
|properties|Properties object for overriding the default Azure configuration. See below for more information.|None|No (defaults will be used if not given)|
@ -471,7 +471,7 @@ Sample specs:
|uris|JSON array of URIs where the Azure objects to be ingested are located, in the form `azure://<container>/<path-to-file>`|None|`uris` or `prefixes` or `objects` must be set|
|prefixes|JSON array of URI prefixes for the locations of Azure objects to ingest, in the form `azure://<container>/<prefix>`. Empty objects starting with one of the given prefixes are skipped.|None|`uris` or `prefixes` or `objects` must be set|
|objects|JSON array of Azure objects to ingest.|None|`uris` or `prefixes` or `objects` must be set|
|objectGlob|A glob for the object part of the Azure URI. In the URI `azure://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azure://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|objectGlob|A glob for the object part of the Azure URI. In the URI `azure://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azure://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (Azure blob URI starting with `azure://`), `__file_bucket` (Azure bucket), and `__file_path` (Azure object path).|None|no|
Note that the Azure input source skips all empty objects only when `prefixes` is specified.

View File

@ -143,41 +143,54 @@ Similarly, you can use `metrics-.*` as the value for `topicPattern` if you want
#### Consumer properties
Consumer properties control how a supervisor reads and processes event messages from a Kafka stream. For more information about consumers, refer to the [Apache Kafka documentation](https://kafka.apache.org/documentation/#consumerconfigs).
Consumer properties control how a supervisor reads and processes event messages from a Kafka stream. For more information about consumer configuration and advanced use cases, refer to the [Kafka documentation](https://kafka.apache.org/documentation/#consumerconfigs).
The `consumerProperties` object must contain a `bootstrap.servers` property with a list of Kafka brokers in the form: `<BROKER_1>:<PORT_1>,<BROKER_2>:<PORT_2>,...`.
By default, `isolation.level` is set to `read_committed`.
You must include `bootstrap.servers` in consumer properties with a list of Kafka brokers in the format `<BROKER_1>:<PORT_1>,<BROKER_2>:<PORT_2>,...`.
In some cases, you may need to retrieve consumer properties at runtime. For example, when `bootstrap.servers` is unknown or not static.
If you use older versions of Kafka servers without transactions support or don't want Druid to consume only committed transactions, set `isolation.level` to `read_uncommitted`. If you need Druid to consume older versions of Kafka, make sure offsets are sequential, since there is no offset gap check in Druid.
The `isolation.level` property in `consumerProperties` determines how Druid reads messages written transactionally.
If you use older versions of Kafka servers without transaction support or you don't want Druid to consume only committed transactions, set `isolation.level` to `read_uncommitted`.
With `read_uncommitted`, which is the default setting, Druid reads all messages, including aborted transactional messages.
Make sure offsets are sequential, since there is no offset gap check in Druid.
For Druid to consume only committed transactional messages, set `isolation.level` to `read_committed`.
If your Kafka cluster enables consumer-group based ACLs, you can set `group.id` in `consumerProperties` to override the default auto generated group ID.
If your Kafka cluster enables consumer group ACLs, you can set `group.id` in `consumerProperties` to override the default auto generated group ID.
In some cases, you may need to fetch consumer properties at runtime. For example, when `bootstrap.servers` is not known upfront or is not static. To enable SSL connections, you must provide passwords for `keystore`, `truststore`, and `key` secretly. You can provide configurations at runtime with a dynamic config provider implementation like the environment variable config provider that comes with Druid. For more information, see [Dynamic config provider](../operations/dynamic-config-provider.md).
To enable SSL connections, you must provide passwords for `keystore`, `truststore`, and `key` confidentially. You can specify these settings in the `jaas.conf` login configuration file or in `consumerProperties` with `sasl.jaas.config`.
To protect sensitive information, use the [environment variable dynamic config provider](../operations/dynamic-config-provider.md#environment-variable-dynamic-config-provider) to store credentials in system environment variables instead of plain text.
Although you can also use the [password provider](../operations/password-provider.md) interface to specify SSL configuration for Kafka ingestion, consider using the dynamic config provider as this feature is deprecated.
For example, if you are using SASL and SSL with Kafka, set the following environment variables for the Druid user on the machines running the Overlord and the Peon services:
For example, when using SASL and SSL with Kafka, set the following environment variables for the Druid user on machines running the Overlord and Peon services. Replace the values to match your environment configurations.
```
export KAFKA_JAAS_CONFIG="org.apache.kafka.common.security.plain.PlainLoginModule required username='admin_user' password='admin_password';"
export KAFKA_JAAS_CONFIG="org.apache.kafka.common.security.plain.PlainLoginModule required username='accesskey' password='secret key';"
export SSL_KEY_PASSWORD=mysecretkeypassword
export SSL_KEYSTORE_PASSWORD=mysecretkeystorepassword
export SSL_TRUSTSTORE_PASSWORD=mysecrettruststorepassword
```
When you define the consumer properties in the supervisor spec, use the dynamic config provider to refer to the environment variables:
```json
"druid.dynamic.config.provider": {
"type": "environment",
"variables": {
"sasl.jaas.config": "KAFKA_JAAS_CONFIG",
"ssl.key.password": "SSL_KEY_PASSWORD",
"ssl.keystore.password": "SSL_KEYSTORE_PASSWORD",
"ssl.truststore.password": "SSL_TRUSTSTORE_PASSWORD"
"consumerProperties": {
"bootstrap.servers": "localhost:9092",
"security.protocol": "SASL_SSL",
"sasl.mechanism": "PLAIN",
"ssl.keystore.location": "/opt/kafka/config/kafka01.keystore.jks",
"ssl.truststore.location": "/opt/kafka/config/kafka.truststore.jks",
"druid.dynamic.config.provider": {
"type": "environment",
"variables": {
"sasl.jaas.config": "KAFKA_JAAS_CONFIG",
"ssl.key.password": "SSL_KEY_PASSWORD",
"ssl.keystore.password": "SSL_KEYSTORE_PASSWORD",
"ssl.truststore.password": "SSL_TRUSTSTORE_PASSWORD"
}
}
}
```
Verify that you've changed the values for all configurations to match your own environment. In the Druid data loader interface, you can use the environment variable config provider syntax in the **Consumer properties** field on the **Connect tab**. When connecting to Kafka, Druid replaces the environment variables with their corresponding values.
You can provide SSL connections with [Password provider](../operations/password-provider.md) interface to define the `keystore`, `truststore`, and `key`, but this feature is deprecated.
When connecting to Kafka, Druid replaces the environment variables with their corresponding values.
#### Idle configuration

View File

@ -27,7 +27,11 @@ a Java runtime for Druid.
## Selecting a Java runtime
Druid fully supports Java 8u92+, Java 11, and Java 17. The project team recommends Java 17.
Druid fully supports Java 11 and Java 17. The project team recommends Java 17.
:::info
Note: Starting with Apache Druid 32.0.0, support for Java 8 has been removed.
:::
The project team recommends using an OpenJDK-based Java distribution. There are many free and actively-supported
distributions available, including

View File

@ -62,7 +62,6 @@ Most metric values reset each emission period, as specified in `druid.monitoring
|`query/failed/count`|Number of failed queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|`query/interrupted/count`|Number of queries interrupted due to cancellation.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|`query/timeout/count`|Number of timed out queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|`query/segments/count`|This metric is not enabled by default. See the `QueryMetrics` Interface for reference regarding enabling this metric. Number of segments that will be touched by the query. In the broker, it makes a plan to distribute the query to realtime tasks and historicals based on a snapshot of segment distribution state. If there are some segments moved after this snapshot is created, certain historicals and realtime tasks can report those segments as missing to the broker. The broker will resend the query to the new servers that serve those segments after move. In this case, those segments can be counted more than once in this metric.||Varies|
|`query/priority`|Assigned lane and priority, only if Laning strategy is enabled. Refer to [Laning strategies](../configuration/index.md#laning-strategies)|`lane`, `dataSource`, `type`|0|
|`sqlQuery/time`|Milliseconds taken to complete a SQL query.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`, `engine`|< 1s|
@ -103,7 +102,6 @@ Most metric values reset each emission period, as specified in `druid.monitoring
|`query/failed/count`|Number of failed queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|`query/interrupted/count`|Number of queries interrupted due to cancellation.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|`query/timeout/count`|Number of timed out queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
### Real-time
@ -119,7 +117,18 @@ Most metric values reset each emission period, as specified in `druid.monitoring
|`query/failed/count`|Number of failed queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|`query/interrupted/count`|Number of queries interrupted due to cancellation.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|`query/timeout/count`|Number of timed out queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
### GroupBy query metrics
These metrics are reported from broker, historical and real-time nodes
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Should be ideally 0, though a higher number isn't representative of a problem.|
|`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.|
|`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.|
|`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
|`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
|`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
|`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
### Jetty

View File

@ -37,10 +37,10 @@ Apache Druid uses Jetty as its embedded web server.
To get familiar with TLS/SSL, along with related concepts like keys and certificates,
read [Configuring Secure Protocols](https://www.eclipse.org/jetty/documentation/jetty-12/operations-guide/index.html#og-protocols-ssl) in the Jetty documentation.
To get more in-depth knowledge of TLS/SSL support in Java in general, refer to the [Java Secure Socket Extension (JSSE) Reference Guide](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html).
To get more in-depth knowledge of TLS/SSL support in Java in general, refer to the [Java Secure Socket Extension (JSSE) Reference Guide](https://docs.oracle.com/en/java/javase/11/security/java-secure-socket-extension-jsse-reference-guide.html).
The [Class SslContextFactory](https://www.eclipse.org/jetty/javadoc/jetty-11/org/eclipse/jetty/util/ssl/SslContextFactory.html)
reference doc can help in understanding TLS/SSL configurations listed below. Finally, [Java Cryptography Architecture
Standard Algorithm Name Documentation for JDK 8](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all possible
Standard Algorithm Name Documentation for JDK 11](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all possible
values for the configs below, among others provided by Java implementation.
|Property|Description|Default|Required|
@ -79,7 +79,7 @@ The following table contains non-mandatory advanced configuration options, use c
## Internal communication over TLS
Whenever possible Druid processes will use HTTPS to talk to each other. To enable this communication Druid's HttpClient needs to
be configured with a proper [SSLContext](http://docs.oracle.com/javase/8/docs/api/javax/net/ssl/SSLContext.html) that is able
be configured with a proper [SSLContext](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/javax/net/ssl/SSLContext.html) that is able
to validate the Server Certificates, otherwise communication will fail.
Since, there are various ways to configure SSLContext, by default, Druid looks for an instance of SSLContext Guice binding

View File

@ -255,7 +255,7 @@ For a regular dimension, it assumes the string is formatted in
[ISO-8601 date and time format](https://en.wikipedia.org/wiki/ISO_8601).
* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or null to use the default ISO8601 format.
* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](http://www.oracle.com/technetwork/java/javase/java8locales-2095355.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc.
* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc.
* `timeZone` : time zone to use in [IANA tz database format](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones), e.g. `Europe/Berlin` (this can possibly be different than the aggregation time-zone)
* `granularity` : [granularity](granularities.md) to apply before formatting, or omit to not apply any granularity.
* `asMillis` : boolean value, set to true to treat input strings as millis rather than ISO8601 strings. Additionally, if `format` is null or not specified, output will be in millis rather than ISO8601.

View File

@ -439,7 +439,7 @@ The regular expression filter is similar to the selector filter, but using regul
| -------- | ----------- | -------- |
| `type` | Must be "regex".| Yes |
| `dimension` | Input column or virtual column name to filter on. | Yes |
| `pattern` | String pattern to match - any standard [Java regular expression](http://docs.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html). | Yes |
| `pattern` | String pattern to match - any standard [Java regular expression](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html). | Yes |
| `extractionFn` | [Extraction function](./dimensionspecs.md#extraction-functions) to apply to `dimension` prior to value matching. See [filtering with extraction functions](#filtering-with-extraction-functions) for details. | No |
Note that it is often more optimal to use a like filter instead of a regex for simple matching of prefixes.

View File

@ -81,7 +81,7 @@ The following built-in functions are available.
|name|description|
|----|-----------|
|concat|concat(expr, expr...) concatenate a list of strings|
|format|format(pattern[, args...]) returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#format-java.lang.String-java.lang.Object...-).|
|format|format(pattern[, args...]) returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object...)).|
|like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`|
|lookup|lookup(expr, lookup-name[,replaceMissingValueWith]) looks up expr in a registered,`replaceMissingValueWith` is an optional constant string [query-time lookup](../querying/lookups.md)|
|parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.|

View File

@ -115,7 +115,7 @@ String functions accept strings and return a type appropriate to the function.
|`REPLACE(expr, substring, replacement)`|Replaces instances of `substring` in `expr` with `replacement` and returns the result.|
|`REPEAT(expr, N)`|Repeats `expr` `N` times.|
|`REVERSE(expr)`|Reverses `expr`.|
|`STRING_FORMAT(pattern[, args...])`|Returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#format-java.lang.String-java.lang.Object...-).|
|`STRING_FORMAT(pattern[, args...])`|Returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object...)).|
|`STRPOS(expr, substring)`|Returns the index of `substring` within `expr`, with indexes starting from 1. If `substring` is not found, returns 0.|
|`SUBSTRING(expr, index[, length])`|Returns a substring of `expr` starting at a given one-based index. If `length` is omitted, extracts characters to the end of the string, otherwise returns a substring of `length` UTF-16 characters.|
|`SUBSTR(expr, index[, length])`|Alias for `SUBSTRING`.|

View File

@ -32,7 +32,7 @@ sidebar_label: "TopN"
Apache Druid TopN queries return a sorted set of results for the values in a given dimension according to some criteria. Conceptually, they can be thought of as an approximate [GroupByQuery](../querying/groupbyquery.md) over a single dimension with an [Ordering](../querying/limitspec.md) spec. TopNs are much faster and resource efficient than GroupBys for this use case. These types of queries take a topN query object and return an array of JSON objects where each object represents a value asked for by the topN query.
TopNs are approximate in that each data process will rank their top K results and only return those top K results to the Broker. K, by default in Druid, is `max(1000, threshold)`. In practice, this means that if you ask for the top 1000 items ordered, the correctness of the first ~900 items will be 100%, and the ordering of the results after that is not guaranteed. TopNs can be made more accurate by increasing the threshold.
TopNs are approximate in that each data process will rank their top K results and only return those top K results to the Broker. K, by default in Druid, is `max(1000, threshold)`.
A topN query object looks like:

View File

@ -133,7 +133,7 @@ The [basic cluster tuning guide](../operations/basic-cluster-tuning.md) has info
We recommend running your favorite Linux distribution. You will also need
* [Java 8u92+, 11, or 17](../operations/java.md)
* [Java 11 or 17](../operations/java.md)
* Python 2 or Python 3
:::info
@ -141,8 +141,8 @@ We recommend running your favorite Linux distribution. You will also need
`DRUID_JAVA_HOME` or `JAVA_HOME`. For more details run the `bin/verify-java` script.
:::
For information about installing Java, see the documentation for your OS package manager. If your Ubuntu-based OS does not have a recent enough version of Java, WebUpd8 offers [packages for those
OSes](http://www.webupd8.org/2012/09/install-oracle-java-8-in-ubuntu-via-ppa.html).
For information about installing Java, see the documentation for your OS package manager. If your Ubuntu-based OS does not have a recent enough version of Java, Linux Uprising offers [packages for those
OSes](https://launchpad.net/~linuxuprising/+archive/ubuntu/java).
## Download the distribution

View File

@ -40,7 +40,7 @@ You can follow these steps on a relatively modest machine, such as a workstation
The software requirements for the installation machine are:
* Linux, Mac OS X, or other Unix-like OS. (Windows is not supported)
* [Java 8u92+, 11, or 17](../operations/java.md)
* [Java 11 or 17](../operations/java.md)
* Python 3 (preferred) or Python 2
* Perl 5

View File

@ -28,14 +28,14 @@ sub fail_check {
: "No Java runtime was detected on your system.";
print STDERR <<"EOT";
Druid requires Java 8, 11, or 17. $current_version_text
Druid requires Java 11 or 17. $current_version_text
If you believe this check is in error, or you want to proceed with a potentially
unsupported Java runtime, you can skip this check using an environment variable:
export DRUID_SKIP_JAVA_CHECK=1
Otherwise, install Java 8, 11, or 17 in one of the following locations.
Otherwise, install Java 11 or 17 in one of the following locations.
* DRUID_JAVA_HOME
* JAVA_HOME
@ -68,6 +68,6 @@ if ($?) {
}
# If we know it won't work, die. Otherwise hope for the best.
if ($java_version =~ /version \"((\d+)\.(\d+).*?)\"/ && !($2 == 1 && $3 == 8) && $2 != 11 && $2 != 17 ) {
if ($java_version =~ /version \"((\d+)\.(\d+).*?)\"/ && $2 != 11 && $2 != 17) {
fail_check($1);
}

View File

@ -52,11 +52,11 @@ RUN rpm --import http://repos.azulsystems.com/RPM-GPG-KEY-azulsystems && \
rpm -ivh zulu-repo-${ZULU_REPO_VER}.noarch.rpm && \
yum -q -y update && \
yum -q -y upgrade && \
yum -q -y install zulu8-jdk && \
yum -q -y install zulu17-jdk && \
yum clean all && \
rm -rf /var/cache/yum zulu-repo_${ZULU_REPO_VER}.noarch.rpm
ENV JAVA_HOME=/usr/lib/jvm/zulu8
ENV JAVA_HOME=/usr/lib/jvm/zulu17
ENV PATH $PATH:$JAVA_HOME/bin
# hadoop
@ -73,7 +73,7 @@ ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
ENV YARN_CONF_DIR $HADOOP_HOME/etc/hadoop
# in hadoop 3 the example file is nearly empty so we can just append stuff
RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu8' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu17' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
RUN sed -i '$ a export HADOOP_HOME=/usr/local/hadoop' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
RUN sed -i '$ a export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
RUN sed -i '$ a export HDFS_NAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh

View File

@ -48,7 +48,6 @@
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
<version>${guice.version}</version>
<exclusions>
<exclusion>
<groupId>aopalliance</groupId>
@ -60,7 +59,7 @@
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<version>${guice.version}</version>
<scope>provided</scope>
<!--$NO-MVN-MAN-VER$ -->
</dependency>
<dependency>

View File

@ -145,7 +145,6 @@
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
<version>4.1.0</version>
<scope>provided</scope>
</dependency>
<dependency>

View File

@ -262,6 +262,12 @@
<artifactId>reflections</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
<version>1.2</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<extensions>

View File

@ -20,6 +20,7 @@
package org.apache.druid.segment;
import com.google.common.collect.ImmutableList;
import org.apache.druid.collections.BlockingPool;
import org.apache.druid.collections.DefaultBlockingPool;
import org.apache.druid.collections.StupidPool;
import org.apache.druid.common.config.NullHandling;
@ -44,6 +45,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.GroupingEngine;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
@ -70,8 +72,14 @@ public class MapVirtualColumnGroupByTest extends InitializedNullHandlingTest
{
final IncrementalIndex incrementalIndex = MapVirtualColumnTestBase.generateIndex();
final GroupByQueryConfig config = new GroupByQueryConfig();
final BlockingPool<ByteBuffer> mergePool =
new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 1);
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
final GroupByResourcesReservationPool groupByResourcesReservationPool =
new GroupByResourcesReservationPool(new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 1), config);
new GroupByResourcesReservationPool(mergePool, config);
final GroupingEngine groupingEngine = new GroupingEngine(
new DruidProcessingConfig()
{
@ -103,7 +111,8 @@ public class MapVirtualColumnGroupByTest extends InitializedNullHandlingTest
groupByResourcesReservationPool,
TestHelper.makeJsonMapper(),
new DefaultObjectMapper(),
QueryRunnerTestHelper.NOOP_QUERYWATCHER
QueryRunnerTestHelper.NOOP_QUERYWATCHER,
groupByStatsProvider
);
final GroupByQueryRunnerFactory factory = new GroupByQueryRunnerFactory(

View File

@ -173,13 +173,6 @@
<artifactId>equalsverifier</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
<version>${mockito.version}</version>
<scope>test</scope>
</dependency>
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>

View File

@ -63,6 +63,7 @@ import org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketc
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.GroupingEngine;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.segment.column.ColumnType;
@ -272,7 +273,8 @@ public class DatasketchesProjectionTest extends InitializedNullHandlingTest
TestHelper.makeJsonMapper(),
TestHelper.makeSmileMapper(),
(query, future) -> {
}
},
new GroupByStatsProvider()
);
}

View File

@ -145,18 +145,12 @@
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${mockito.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>

View File

@ -348,7 +348,10 @@ public class MSQCompactionRunner implements CompactionRunner
private static RowSignature getRowSignature(DataSchema dataSchema)
{
RowSignature.Builder rowSignatureBuilder = RowSignature.builder();
rowSignatureBuilder.add(dataSchema.getTimestampSpec().getTimestampColumn(), ColumnType.LONG);
if (dataSchema.getDimensionsSpec().isForceSegmentSortByTime() == true) {
// If sort not forced by time, __time appears as part of dimensions in DimensionsSpec
rowSignatureBuilder.add(dataSchema.getTimestampSpec().getTimestampColumn(), ColumnType.LONG);
}
if (!isQueryGranularityEmptyOrNone(dataSchema)) {
// A virtual column for query granularity would have been added. Add corresponding column type.
rowSignatureBuilder.add(TIME_VIRTUAL_COLUMN, ColumnType.LONG);
@ -398,25 +401,31 @@ public class MSQCompactionRunner implements CompactionRunner
private static ColumnMappings getColumnMappings(DataSchema dataSchema)
{
List<ColumnMapping> columnMappings = dataSchema.getDimensionsSpec()
.getDimensions()
.stream()
.map(dim -> new ColumnMapping(
dim.getName(), dim.getName()))
.collect(Collectors.toList());
List<ColumnMapping> columnMappings = new ArrayList<>();
// For scan queries, a virtual column is created from __time if a custom query granularity is provided. For
// group-by queries, as insert needs __time, it will always be one of the dimensions. Since dimensions in groupby
// aren't allowed to have time column as the output name, we map time dimension to TIME_VIRTUAL_COLUMN in
// dimensions, and map it back to the time column here.
String timeColumn = (isGroupBy(dataSchema) || !isQueryGranularityEmptyOrNone(dataSchema))
? TIME_VIRTUAL_COLUMN
: ColumnHolder.TIME_COLUMN_NAME;
ColumnMapping timeColumnMapping = new ColumnMapping(timeColumn, ColumnHolder.TIME_COLUMN_NAME);
if (dataSchema.getDimensionsSpec().isForceSegmentSortByTime()) {
// When not sorted by time, the __time column is missing from dimensionsSpec
columnMappings.add(timeColumnMapping);
}
columnMappings.addAll(
dataSchema.getDimensionsSpec()
.getDimensions()
.stream()
.map(dim -> dim.getName().equals(ColumnHolder.TIME_COLUMN_NAME)
? timeColumnMapping
: new ColumnMapping(dim.getName(), dim.getName()))
.collect(Collectors.toList())
);
columnMappings.addAll(Arrays.stream(dataSchema.getAggregators())
.map(agg -> new ColumnMapping(agg.getName(), agg.getName()))
.collect(
Collectors.toList()));
if (isGroupBy(dataSchema) || !isQueryGranularityEmptyOrNone(dataSchema)) {
// For scan queries, a virtual column is created from __time if a custom query granularity is provided. For
// group-by queries, as insert needs __time, it will always be one of the dimensions. Since dimensions in groupby
// aren't allowed to have time column as the output name, we map time dimension to TIME_VIRTUAL_COLUMN in
// dimensions, and map it back to the time column here.
columnMappings.add(new ColumnMapping(TIME_VIRTUAL_COLUMN, ColumnHolder.TIME_COLUMN_NAME));
} else {
columnMappings.add(new ColumnMapping(ColumnHolder.TIME_COLUMN_NAME, ColumnHolder.TIME_COLUMN_NAME));
}
.collect(Collectors.toList()));
return new ColumnMappings(columnMappings);
}
@ -431,6 +440,19 @@ public class MSQCompactionRunner implements CompactionRunner
return Collections.emptyList();
}
private static Map<String, Object> buildQueryContext(
Map<String, Object> taskContext,
DataSchema dataSchema
)
{
if (dataSchema.getDimensionsSpec().isForceSegmentSortByTime()) {
return taskContext;
}
Map<String, Object> queryContext = new HashMap<>(taskContext);
queryContext.put(MultiStageQueryContext.CTX_FORCE_TIME_SORT, false);
return queryContext;
}
private static Query<?> buildScanQuery(
CompactionTask compactionTask,
Interval interval,
@ -447,7 +469,7 @@ public class MSQCompactionRunner implements CompactionRunner
.columnTypes(rowSignature.getColumnTypes())
.intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(interval)))
.filters(dataSchema.getTransformSpec().getFilter())
.context(compactionTask.getContext());
.context(buildQueryContext(compactionTask.getContext(), dataSchema));
if (compactionTask.getTuningConfig() != null && compactionTask.getTuningConfig().getPartitionsSpec() != null) {
List<OrderByColumnSpec> orderByColumnSpecs = getOrderBySpec(compactionTask.getTuningConfig().getPartitionsSpec());
@ -599,7 +621,7 @@ public class MSQCompactionRunner implements CompactionRunner
.setDimensions(getAggregateDimensions(dataSchema, inputColToVirtualCol))
.setAggregatorSpecs(Arrays.asList(dataSchema.getAggregators()))
.setPostAggregatorSpecs(postAggregators)
.setContext(compactionTask.getContext())
.setContext(buildQueryContext(compactionTask.getContext(), dataSchema))
.setInterval(interval);
if (compactionTask.getTuningConfig() != null && compactionTask.getTuningConfig().getPartitionsSpec() != null) {

View File

@ -60,7 +60,9 @@ import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.NestedDataColumnSchema;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.CompressionFactory;
@ -72,14 +74,13 @@ import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.coordinator.CompactionConfigValidationResult;
import org.apache.druid.sql.calcite.parser.DruidSqlInsert;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.joda.time.Interval;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -103,10 +104,14 @@ public class MSQCompactionRunnerTest
private static final StringDimensionSchema STRING_DIMENSION = new StringDimensionSchema("string_dim", null, false);
private static final StringDimensionSchema MV_STRING_DIMENSION = new StringDimensionSchema("mv_string_dim", null, null);
private static final LongDimensionSchema LONG_DIMENSION = new LongDimensionSchema("long_dim");
private static final NestedDataColumnSchema NESTED_DIMENSION = new NestedDataColumnSchema("nested_dim", 4);
private static final AutoTypeColumnSchema AUTO_DIMENSION = new AutoTypeColumnSchema("auto_dim", null);
private static final List<DimensionSchema> DIMENSIONS = ImmutableList.of(
STRING_DIMENSION,
LONG_DIMENSION,
MV_STRING_DIMENSION
MV_STRING_DIMENSION,
NESTED_DIMENSION,
AUTO_DIMENSION
);
private static final Map<Interval, DataSchema> INTERVAL_DATASCHEMAS = ImmutableMap.of(
COMPACTION_INTERVAL,
@ -144,7 +149,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
null,
null,
null
);
CompactionConfigValidationResult validationResult = MSQ_COMPACTION_RUNNER.validateCompactionTask(
@ -166,7 +170,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
null,
null,
null
);
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
@ -180,7 +183,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
null,
null,
null
);
Assert.assertTrue(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
@ -195,7 +197,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
null,
null,
null
);
@ -218,7 +219,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
null,
null,
null
);
@ -240,7 +240,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
null,
null,
null
);
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
@ -254,7 +253,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
null,
null,
null
);
Assert.assertTrue(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
@ -268,7 +266,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
new ClientCompactionTaskGranularitySpec(null, Granularities.ALL, null),
null,
null
);
Assert.assertTrue(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
@ -282,7 +279,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
new ClientCompactionTaskGranularitySpec(null, null, false),
null,
AGGREGATORS.toArray(new AggregatorFactory[0])
);
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
@ -296,7 +292,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
new ClientCompactionTaskGranularitySpec(null, null, true),
null,
null
);
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
@ -313,7 +308,6 @@ public class MSQCompactionRunnerTest
null,
Collections.emptyMap(),
new ClientCompactionTaskGranularitySpec(null, null, true),
null,
new AggregatorFactory[]{new LongSumAggregatorFactory(outputColName, inputColName)}
);
CompactionConfigValidationResult validationResult = MSQ_COMPACTION_RUNNER.validateCompactionTask(
@ -330,13 +324,13 @@ public class MSQCompactionRunnerTest
@Test
public void testRunCompactionTasksWithEmptyTaskListFails() throws Exception
{
CompactionTask compactionTask = createCompactionTask(null, null, Collections.emptyMap(), null, null, null);
CompactionTask compactionTask = createCompactionTask(null, null, Collections.emptyMap(), null, null);
TaskStatus taskStatus = MSQ_COMPACTION_RUNNER.runCompactionTasks(compactionTask, Collections.emptyMap(), null);
Assert.assertTrue(taskStatus.isFailure());
}
@Test
public void testMSQControllerTaskSpecWithScanIsValid() throws JsonProcessingException
public void testCompactionConfigWithoutMetricsSpecProducesCorrectSpec() throws JsonProcessingException
{
DimFilter dimFilter = new SelectorDimFilter("dim1", "foo", null);
@ -345,7 +339,6 @@ public class MSQCompactionRunnerTest
dimFilter,
Collections.emptyMap(),
null,
null,
null
);
@ -357,7 +350,7 @@ public class MSQCompactionRunnerTest
.withGranularity(
new UniformGranularitySpec(
SEGMENT_GRANULARITY.getDefaultGranularity(),
null,
QUERY_GRANULARITY.getDefaultGranularity(),
false,
Collections.singletonList(COMPACTION_INTERVAL)
)
@ -375,37 +368,37 @@ public class MSQCompactionRunnerTest
MSQSpec actualMSQSpec = msqControllerTask.getQuerySpec();
Assert.assertEquals(
new MSQTuningConfig(
1,
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
MAX_ROWS_PER_SEGMENT,
null,
createIndexSpec()
),
actualMSQSpec.getTuningConfig()
);
Assert.assertEquals(
new DataSourceMSQDestination(
DATA_SOURCE,
SEGMENT_GRANULARITY.getDefaultGranularity(),
null,
Collections.singletonList(COMPACTION_INTERVAL),
DIMENSIONS.stream().collect(Collectors.toMap(DimensionSchema::getName, Function.identity())),
null
),
actualMSQSpec.getDestination()
);
Assert.assertEquals(getExpectedTuningConfig(), actualMSQSpec.getTuningConfig());
Assert.assertEquals(getExpectedDestination(), actualMSQSpec.getDestination());
Assert.assertTrue(actualMSQSpec.getQuery() instanceof ScanQuery);
ScanQuery scanQuery = (ScanQuery) actualMSQSpec.getQuery();
List<String> expectedColumns = new ArrayList<>();
List<ColumnType> expectedColumnTypes = new ArrayList<>();
// Add __time since this is a time-ordered query which doesn't have __time explicitly defined in dimensionsSpec
expectedColumns.add(ColumnHolder.TIME_COLUMN_NAME);
expectedColumnTypes.add(ColumnType.LONG);
// Add TIME_VIRTUAL_COLUMN since a query granularity is specified
expectedColumns.add(MSQCompactionRunner.TIME_VIRTUAL_COLUMN);
expectedColumnTypes.add(ColumnType.LONG);
expectedColumns.addAll(DIMENSIONS.stream().map(DimensionSchema::getName).collect(Collectors.toList()));
expectedColumnTypes.addAll(DIMENSIONS.stream().map(DimensionSchema::getColumnType).collect(Collectors.toList()));
Assert.assertEquals(expectedColumns, scanQuery.getColumns());
Assert.assertEquals(expectedColumnTypes, scanQuery.getColumnTypes());
Assert.assertEquals(dimFilter, scanQuery.getFilter());
Assert.assertEquals(
JSON_MAPPER.writeValueAsString(SEGMENT_GRANULARITY.toString()),
msqControllerTask.getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY)
);
Assert.assertNull(msqControllerTask.getContext().get(DruidSqlInsert.SQL_INSERT_QUERY_GRANULARITY));
Assert.assertEquals(
JSON_MAPPER.writeValueAsString(QUERY_GRANULARITY.toString()),
msqControllerTask.getContext().get(DruidSqlInsert.SQL_INSERT_QUERY_GRANULARITY)
);
Assert.assertEquals(WorkerAssignmentStrategy.MAX, actualMSQSpec.getAssignmentStrategy());
Assert.assertEquals(
PARTITION_DIMENSIONS.stream().map(OrderBy::ascending).collect(Collectors.toList()),
@ -414,7 +407,60 @@ public class MSQCompactionRunnerTest
}
@Test
public void testMSQControllerTaskSpecWithAggregatorsIsValid() throws JsonProcessingException
public void testCompactionConfigWithSortOnNonTimeDimensionsProducesCorrectSpec() throws JsonProcessingException
{
List<DimensionSchema> nonTimeSortedDimensions = ImmutableList.of(
STRING_DIMENSION,
new LongDimensionSchema(ColumnHolder.TIME_COLUMN_NAME),
LONG_DIMENSION
);
CompactionTask taskCreatedWithTransformSpec = createCompactionTask(
new DynamicPartitionsSpec(TARGET_ROWS_PER_SEGMENT, null),
null,
Collections.emptyMap(),
null,
null
);
// Set forceSegmentSortByTime=false to enable non-time order
DimensionsSpec dimensionsSpec = DimensionsSpec.builder()
.setDimensions(nonTimeSortedDimensions)
.setForceSegmentSortByTime(false)
.build();
DataSchema dataSchema =
DataSchema.builder()
.withDataSource(DATA_SOURCE)
.withTimestamp(new TimestampSpec(TIMESTAMP_COLUMN, null, null))
.withDimensions(dimensionsSpec)
.withGranularity(
new UniformGranularitySpec(
SEGMENT_GRANULARITY.getDefaultGranularity(),
null,
false,
Collections.singletonList(COMPACTION_INTERVAL)
)
)
.build();
List<MSQControllerTask> msqControllerTasks = MSQ_COMPACTION_RUNNER.createMsqControllerTasks(
taskCreatedWithTransformSpec,
Collections.singletonMap(COMPACTION_INTERVAL, dataSchema)
);
MSQSpec actualMSQSpec = Iterables.getOnlyElement(msqControllerTasks).getQuerySpec();
Assert.assertTrue(actualMSQSpec.getQuery() instanceof ScanQuery);
ScanQuery scanQuery = (ScanQuery) actualMSQSpec.getQuery();
// Dimensions should already list __time and the order should remain intact
Assert.assertEquals(
nonTimeSortedDimensions.stream().map(DimensionSchema::getName).collect(Collectors.toList()),
scanQuery.getColumns()
);
}
@Test
public void testCompactionConfigWithMetricsSpecProducesCorrectSpec() throws JsonProcessingException
{
DimFilter dimFilter = new SelectorDimFilter("dim1", "foo", null);
@ -423,7 +469,6 @@ public class MSQCompactionRunnerTest
dimFilter,
Collections.emptyMap(),
null,
null,
null
);
@ -444,7 +489,6 @@ public class MSQCompactionRunnerTest
multiValuedDimensions
);
List<MSQControllerTask> msqControllerTasks = MSQ_COMPACTION_RUNNER.createMsqControllerTasks(
taskCreatedWithTransformSpec,
Collections.singletonMap(COMPACTION_INTERVAL, dataSchema)
@ -454,27 +498,8 @@ public class MSQCompactionRunnerTest
MSQSpec actualMSQSpec = msqControllerTask.getQuerySpec();
Assert.assertEquals(
new MSQTuningConfig(
1,
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
MAX_ROWS_PER_SEGMENT,
null,
createIndexSpec()
),
actualMSQSpec.getTuningConfig()
);
Assert.assertEquals(
new DataSourceMSQDestination(
DATA_SOURCE,
SEGMENT_GRANULARITY.getDefaultGranularity(),
null,
Collections.singletonList(COMPACTION_INTERVAL),
DIMENSIONS.stream().collect(Collectors.toMap(DimensionSchema::getName, Function.identity())),
null
),
actualMSQSpec.getDestination()
);
Assert.assertEquals(getExpectedTuningConfig(), actualMSQSpec.getTuningConfig());
Assert.assertEquals(getExpectedDestination(), actualMSQSpec.getDestination());
Assert.assertTrue(actualMSQSpec.getQuery() instanceof GroupByQuery);
GroupByQuery groupByQuery = (GroupByQuery) actualMSQSpec.getQuery();
@ -490,30 +515,32 @@ public class MSQCompactionRunnerTest
);
Assert.assertEquals(WorkerAssignmentStrategy.MAX, actualMSQSpec.getAssignmentStrategy());
// Since only MV_STRING_DIMENSION is indicated to be MVD by the CombinedSchema, conversion to array should happen
// only for that column.
List<DimensionSpec> expectedDimensionSpec = DIMENSIONS.stream()
.filter(dim -> !MV_STRING_DIMENSION.getName()
.equals(dim.getName()))
.map(dim -> new DefaultDimensionSpec(
dim.getName(),
dim.getName(),
dim.getColumnType()
))
.collect(
Collectors.toList());
List<DimensionSpec> expectedDimensionSpec = new ArrayList<>();
expectedDimensionSpec.add(
new DefaultDimensionSpec(MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
ColumnType.LONG)
new DefaultDimensionSpec(
MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
ColumnType.LONG
)
);
String mvToArrayStringDim = MSQCompactionRunner.ARRAY_VIRTUAL_COLUMN_PREFIX + MV_STRING_DIMENSION.getName();
expectedDimensionSpec.add(new DefaultDimensionSpec(mvToArrayStringDim, mvToArrayStringDim, ColumnType.STRING_ARRAY));
MatcherAssert.assertThat(
expectedDimensionSpec,
Matchers.containsInAnyOrder(groupByQuery.getDimensions().toArray(new DimensionSpec[0]))
);
// Since only MV_STRING_DIMENSION is indicated to be MVD by the CombinedSchema, conversion to array should happen
// only for that column.
expectedDimensionSpec.addAll(DIMENSIONS.stream()
.map(dim ->
MV_STRING_DIMENSION.getName().equals(dim.getName())
? new DefaultDimensionSpec(
mvToArrayStringDim,
mvToArrayStringDim,
ColumnType.STRING_ARRAY
)
: new DefaultDimensionSpec(
dim.getName(),
dim.getName(),
dim.getColumnType()
))
.collect(Collectors.toList()));
Assert.assertEquals(expectedDimensionSpec, groupByQuery.getDimensions());
}
private CompactionTask createCompactionTask(
@ -521,7 +548,6 @@ public class MSQCompactionRunnerTest
@Nullable DimFilter dimFilter,
Map<String, Object> contextParams,
@Nullable ClientCompactionTaskGranularitySpec granularitySpec,
@Nullable List<DimensionSchema> dimensionSchemas,
@Nullable AggregatorFactory[] metricsSpec
)
{
@ -545,7 +571,7 @@ public class MSQCompactionRunnerTest
))
.transformSpec(transformSpec)
.granularitySpec(granularitySpec)
.dimensionsSpec(new DimensionsSpec(dimensionSchemas))
.dimensionsSpec(new DimensionsSpec(null))
.metricsSpec(metricsSpec)
.compactionRunner(MSQ_COMPACTION_RUNNER)
.context(context);
@ -580,4 +606,27 @@ public class MSQCompactionRunnerTest
.withLongEncoding(CompressionFactory.LongEncodingStrategy.LONGS)
.build();
}
private static DataSourceMSQDestination getExpectedDestination()
{
return new DataSourceMSQDestination(
DATA_SOURCE,
SEGMENT_GRANULARITY.getDefaultGranularity(),
null,
Collections.singletonList(COMPACTION_INTERVAL),
DIMENSIONS.stream().collect(Collectors.toMap(DimensionSchema::getName, Function.identity())),
null
);
}
private static MSQTuningConfig getExpectedTuningConfig()
{
return new MSQTuningConfig(
1,
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
MAX_ROWS_PER_SEGMENT,
null,
createIndexSpec()
);
}
}

View File

@ -62,13 +62,12 @@ public class CalciteUnionQueryMSQTest extends CalciteUnionQueryTest
*/
@Test
@Override
public void testUnionIsUnplannable()
public void testUnionDifferentColumnOrder()
{
assertQueryIsUnplannable(
"SELECT dim2, dim1, m1 FROM foo2 UNION SELECT dim1, dim2, m1 FROM foo",
"SQL requires union between two tables and column names queried for each table are different Left: [dim2, dim1, m1], Right: [dim1, dim2, m1]."
);
}
@Disabled("Ignored till MSQ can plan UNION ALL with any operand")

View File

@ -511,7 +511,6 @@ public class MSQTestBase extends BaseCalciteQueryTest
binder -> binder.bind(SegmentManager.class).toInstance(EasyMock.createMock(SegmentManager.class)),
new JoinableFactoryModule(),
new IndexingServiceTuningConfigModule(),
new MSQIndexingModule(),
Modules.override(new MSQSqlModule()).with(
binder -> {
// Our Guice configuration currently requires bindings to exist even if they aren't ever used, the
@ -540,6 +539,7 @@ public class MSQTestBase extends BaseCalciteQueryTest
objectMapper = setupObjectMapper(injector);
objectMapper.registerModules(new StorageConnectorModule().getJacksonModules());
objectMapper.registerModules(new MSQIndexingModule().getJacksonModules());
objectMapper.registerModules(sqlModule.getJacksonModules());
objectMapper.registerModules(BuiltInTypesModule.getJacksonModulesList());
@ -697,7 +697,6 @@ public class MSQTestBase extends BaseCalciteQueryTest
break;
default:
throw new ISE("Cannot query segment %s in test runner", segmentId);
}
Segment segment = new Segment()
{

View File

@ -17,6 +17,6 @@
~ under the License.
-->
This module contains a simple implementation of [SslContext](http://docs.oracle.com/javase/8/docs/api/javax/net/ssl/SSLContext.html)
This module contains a simple implementation of [SslContext](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/javax/net/ssl/SSLContext.html)
that will be injected to be used with HttpClient that Druid nodes use internally to communicate with each other.
More details [here](https://druid.apache.org/docs/latest/development/extensions-core/simple-client-sslcontext.html).

View File

@ -145,18 +145,12 @@
<artifactId>equalsverifier</artifactId>
<scope>test</scope>
</dependency>
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${mockito.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<profiles>

View File

@ -293,7 +293,7 @@ public class IndexGeneratorJob implements Jobby
AggregatorFactory[] aggs,
HadoopDruidIndexerConfig config,
@Nullable Iterable<String> oldDimOrder,
Map<String, ColumnFormat> oldCapabilities
@Nullable Map<String, ColumnFormat> oldCapabilities
)
{
final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();

View File

@ -91,6 +91,7 @@
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.ws.rs</groupId>

View File

@ -25,7 +25,6 @@ import org.apache.druid.indexing.common.LockGranularity;
import org.apache.druid.indexing.common.task.IndexTaskUtils;
import org.apache.druid.indexing.common.task.Task;
import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator;
import org.apache.druid.indexing.overlord.Segments;
import org.apache.druid.indexing.overlord.TaskLockbox;
import org.apache.druid.indexing.overlord.config.TaskLockConfig;
import org.apache.druid.java.util.common.ISE;
@ -41,6 +40,7 @@ import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions;
import org.joda.time.Interval;
import java.util.ArrayList;
@ -87,6 +87,8 @@ public class SegmentAllocationQueue
private final ConcurrentHashMap<AllocateRequestKey, AllocateRequestBatch> keyToBatch = new ConcurrentHashMap<>();
private final BlockingDeque<AllocateRequestBatch> processingQueue = new LinkedBlockingDeque<>(MAX_QUEUE_SIZE);
private final boolean reduceMetadataIO;
@Inject
public SegmentAllocationQueue(
TaskLockbox taskLockbox,
@ -100,6 +102,7 @@ public class SegmentAllocationQueue
this.taskLockbox = taskLockbox;
this.metadataStorage = metadataStorage;
this.maxWaitTimeMillis = taskLockConfig.getBatchAllocationWaitTime();
this.reduceMetadataIO = taskLockConfig.isBatchAllocationReduceMetadataIO();
this.executor = taskLockConfig.isBatchSegmentAllocation()
? executorFactory.create(1, "SegmentAllocQueue-%s") : null;
@ -380,13 +383,11 @@ public class SegmentAllocationQueue
private Set<DataSegment> retrieveUsedSegments(AllocateRequestKey key)
{
return new HashSet<>(
metadataStorage.retrieveUsedSegmentsForInterval(
key.dataSource,
key.preferredAllocationInterval,
Segments.ONLY_VISIBLE
)
);
return metadataStorage.getSegmentTimelineForAllocation(
key.dataSource,
key.preferredAllocationInterval,
(key.lockGranularity == LockGranularity.TIME_CHUNK) && reduceMetadataIO
).findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE);
}
private int allocateSegmentsForBatch(AllocateRequestBatch requestBatch, Set<DataSegment> usedSegments)
@ -493,7 +494,8 @@ public class SegmentAllocationQueue
requestKey.dataSource,
tryInterval,
requestKey.skipSegmentLineageCheck,
requestKey.lockGranularity
requestKey.lockGranularity,
reduceMetadataIO
);
int successfulRequests = 0;

View File

@ -1515,7 +1515,7 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
boolean shouldRunPendingTasks = false;
// must be synchronized while iterating:
// https://docs.oracle.com/javase/8/docs/api/java/util/Collections.html#synchronizedSet-java.util.Set-
// https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#synchronizedSet(java.util.Set)
synchronized (blackListedWorkers) {
for (Iterator<ZkWorker> iterator = blackListedWorkers.iterator(); iterator.hasNext(); ) {
ZkWorker zkWorker = iterator.next();

View File

@ -466,6 +466,8 @@ public class TaskLockbox
* @param skipSegmentLineageCheck Whether lineage check is to be skipped
* (this is true for streaming ingestion)
* @param lockGranularity Granularity of task lock
* @param reduceMetadataIO Whether to skip fetching payloads for all used
* segments and rely on their IDs instead.
* @return List of allocation results in the same order as the requests.
*/
public List<SegmentAllocateResult> allocateSegments(
@ -473,7 +475,8 @@ public class TaskLockbox
String dataSource,
Interval interval,
boolean skipSegmentLineageCheck,
LockGranularity lockGranularity
LockGranularity lockGranularity,
boolean reduceMetadataIO
)
{
log.info("Allocating [%d] segments for datasource [%s], interval [%s]", requests.size(), dataSource, interval);
@ -487,9 +490,15 @@ public class TaskLockbox
if (isTimeChunkLock) {
// For time-chunk locking, segment must be allocated only after acquiring the lock
holderList.getPending().forEach(holder -> acquireTaskLock(holder, true));
allocateSegmentIds(dataSource, interval, skipSegmentLineageCheck, holderList.getPending());
allocateSegmentIds(
dataSource,
interval,
skipSegmentLineageCheck,
holderList.getPending(),
reduceMetadataIO
);
} else {
allocateSegmentIds(dataSource, interval, skipSegmentLineageCheck, holderList.getPending());
allocateSegmentIds(dataSource, interval, skipSegmentLineageCheck, holderList.getPending(), false);
holderList.getPending().forEach(holder -> acquireTaskLock(holder, false));
}
holderList.getPending().forEach(SegmentAllocationHolder::markSucceeded);
@ -702,12 +711,12 @@ public class TaskLockbox
* for the given requests. Updates the holder with the allocated segment if
* the allocation succeeds, otherwise marks it as failed.
*/
@VisibleForTesting
void allocateSegmentIds(
private void allocateSegmentIds(
String dataSource,
Interval interval,
boolean skipSegmentLineageCheck,
Collection<SegmentAllocationHolder> holders
Collection<SegmentAllocationHolder> holders,
boolean reduceMetadataIO
)
{
if (holders.isEmpty()) {
@ -724,7 +733,8 @@ public class TaskLockbox
dataSource,
interval,
skipSegmentLineageCheck,
createRequests
createRequests,
reduceMetadataIO
);
for (SegmentAllocationHolder holder : holders) {

View File

@ -36,6 +36,9 @@ public class TaskLockConfig
@JsonProperty
private long batchAllocationWaitTime = 0L;
@JsonProperty
private boolean batchAllocationReduceMetadataIO = true;
public boolean isForceTimeChunkLock()
{
return forceTimeChunkLock;
@ -50,4 +53,10 @@ public class TaskLockConfig
{
return batchAllocationWaitTime;
}
public boolean isBatchAllocationReduceMetadataIO()
{
return batchAllocationReduceMetadataIO;
}
}

View File

@ -93,21 +93,28 @@ public class SegmentAllocateActionTest
private SegmentAllocationQueue allocationQueue;
@Parameterized.Parameters(name = "granularity = {0}, useBatch = {1}")
@Parameterized.Parameters(name = "granularity = {0}, useBatch = {1}, skipSegmentPayloadFetchForAllocation = {2}")
public static Iterable<Object[]> constructorFeeder()
{
return ImmutableList.of(
new Object[]{LockGranularity.SEGMENT, true},
new Object[]{LockGranularity.SEGMENT, false},
new Object[]{LockGranularity.TIME_CHUNK, true},
new Object[]{LockGranularity.TIME_CHUNK, false}
new Object[]{LockGranularity.SEGMENT, true, true},
new Object[]{LockGranularity.SEGMENT, true, false},
new Object[]{LockGranularity.SEGMENT, false, false},
new Object[]{LockGranularity.TIME_CHUNK, true, true},
new Object[]{LockGranularity.TIME_CHUNK, true, false},
new Object[]{LockGranularity.TIME_CHUNK, false, false}
);
}
public SegmentAllocateActionTest(LockGranularity lockGranularity, boolean useBatch)
public SegmentAllocateActionTest(
LockGranularity lockGranularity,
boolean useBatch,
boolean skipSegmentPayloadFetchForAllocation
)
{
this.lockGranularity = lockGranularity;
this.useBatch = useBatch;
this.taskActionTestKit.setSkipSegmentPayloadFetchForAllocation(skipSegmentPayloadFetchForAllocation);
}
@Before

View File

@ -36,6 +36,8 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.ArrayList;
import java.util.List;
@ -44,6 +46,7 @@ import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
@RunWith(Parameterized.class)
public class SegmentAllocationQueueTest
{
@Rule
@ -54,6 +57,19 @@ public class SegmentAllocationQueueTest
private StubServiceEmitter emitter;
private BlockingExecutorService executor;
private final boolean reduceMetadataIO;
@Parameterized.Parameters(name = "reduceMetadataIO = {0}")
public static Object[][] getTestParameters()
{
return new Object[][]{{true}, {false}};
}
public SegmentAllocationQueueTest(boolean reduceMetadataIO)
{
this.reduceMetadataIO = reduceMetadataIO;
}
@Before
public void setUp()
{
@ -73,6 +89,12 @@ public class SegmentAllocationQueueTest
{
return 0;
}
@Override
public boolean isBatchAllocationReduceMetadataIO()
{
return reduceMetadataIO;
}
};
allocationQueue = new SegmentAllocationQueue(

View File

@ -58,6 +58,8 @@ public class TaskActionTestKit extends ExternalResource
private SegmentSchemaManager segmentSchemaManager;
private SegmentSchemaCache segmentSchemaCache;
private boolean skipSegmentPayloadFetchForAllocation = new TaskLockConfig().isBatchAllocationReduceMetadataIO();
public TaskLockbox getTaskLockbox()
{
return taskLockbox;
@ -78,6 +80,11 @@ public class TaskActionTestKit extends ExternalResource
return taskActionToolbox;
}
public void setSkipSegmentPayloadFetchForAllocation(boolean skipSegmentPayloadFetchForAllocation)
{
this.skipSegmentPayloadFetchForAllocation = skipSegmentPayloadFetchForAllocation;
}
@Override
public void before()
{
@ -126,6 +133,12 @@ public class TaskActionTestKit extends ExternalResource
{
return 10L;
}
@Override
public boolean isBatchAllocationReduceMetadataIO()
{
return skipSegmentPayloadFetchForAllocation;
}
};
taskActionToolbox = new TaskActionToolbox(

View File

@ -35,6 +35,7 @@ import org.apache.druid.metadata.ReplaceTaskLock;
import org.apache.druid.segment.SegmentSchemaMapping;
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.PartialShardSpec;
import org.joda.time.DateTime;
import org.joda.time.Interval;
@ -168,7 +169,8 @@ public class TestIndexerMetadataStorageCoordinator implements IndexerMetadataSto
String dataSource,
Interval interval,
boolean skipSegmentLineageCheck,
List<SegmentCreateRequest> requests
List<SegmentCreateRequest> requests,
boolean isTimeChunk
)
{
return Collections.emptyMap();
@ -332,6 +334,20 @@ public class TestIndexerMetadataStorageCoordinator implements IndexerMetadataSto
return Collections.emptyMap();
}
@Override
public SegmentTimeline getSegmentTimelineForAllocation(
String dataSource,
Interval interval,
boolean skipSegmentPayloadFetchForAllocation
)
{
return SegmentTimeline.forSegments(retrieveUsedSegmentsForIntervals(
dataSource,
Collections.singletonList(interval),
Segments.INCLUDING_OVERSHADOWED
));
}
public Set<DataSegment> getPublished()
{
return ImmutableSet.copyOf(published);

View File

@ -86,6 +86,7 @@
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
@ -478,6 +479,9 @@
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<version>3.0.0-M7</version>
<configuration>
<argLine>--add-opens java.base/java.lang=ALL-UNNAMED</argLine>
</configuration>
<dependencies>
<!-- Required to force Failsafe to use JUnit instead of TestNG.
junit47 is required to use test categories. -->

View File

@ -99,7 +99,7 @@ public class ITMultiStageQueryWorkerFaultTolerance
+ " regionIsoCode\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{\"type\":\"local\",\"files\":[\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\"]}',\n"
+ " '{\"type\":\"local\",\"files\":[\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\"]}',\n"
+ " '{\"type\":\"json\"}',\n"
+ " '[{\"type\":\"string\",\"name\":\"timestamp\"},{\"type\":\"string\",\"name\":\"isRobot\"},{\"type\":\"string\",\"name\":\"diffUrl\"},{\"type\":\"long\",\"name\":\"added\"},{\"type\":\"string\",\"name\":\"countryIsoCode\"},{\"type\":\"string\",\"name\":\"regionName\"},{\"type\":\"string\",\"name\":\"channel\"},{\"type\":\"string\",\"name\":\"flags\"},{\"type\":\"long\",\"name\":\"delta\"},{\"type\":\"string\",\"name\":\"isUnpatrolled\"},{\"type\":\"string\",\"name\":\"isNew\"},{\"type\":\"double\",\"name\":\"deltaBucket\"},{\"type\":\"string\",\"name\":\"isMinor\"},{\"type\":\"string\",\"name\":\"isAnonymous\"},{\"type\":\"long\",\"name\":\"deleted\"},{\"type\":\"string\",\"name\":\"cityName\"},{\"type\":\"long\",\"name\":\"metroCode\"},{\"type\":\"string\",\"name\":\"namespace\"},{\"type\":\"string\",\"name\":\"comment\"},{\"type\":\"string\",\"name\":\"page\"},{\"type\":\"long\",\"name\":\"commentLength\"},{\"type\":\"string\",\"name\":\"countryName\"},{\"type\":\"string\",\"name\":\"user\"},{\"type\":\"string\",\"name\":\"regionIsoCode\"}]'\n"
+ " )\n"
@ -139,7 +139,6 @@ public class ITMultiStageQueryWorkerFaultTolerance
private void killTaskAbruptly(String taskIdToKill)
{
String command = "jps -mlv | grep -i peon | grep -i " + taskIdToKill + " |awk '{print $1}'";
ITRetryUtil.retryUntil(() -> {
@ -158,21 +157,17 @@ public class ITMultiStageQueryWorkerFaultTolerance
}
String pidToKill = stdOut.lhs.trim();
if (pidToKill.length() != 0) {
LOG.info("Found PID to kill %s", pidToKill);
// kill worker after 5 seconds
Thread.sleep(5000);
LOG.info("Killing pid %s", pidToKill);
druidClusterAdminClient.runCommandInMiddleManagerContainer(
final Pair<String, String> killResult = druidClusterAdminClient.runCommandInMiddleManagerContainer(
"/bin/bash",
"-c",
"kill -9 " + pidToKill
);
LOG.info(StringUtils.format("Kill command stdout: %s, stderr: %s", killResult.lhs, killResult.rhs));
return true;
} else {
return false;
}
}, true, 6000, 50, StringUtils.format("Figuring out PID for task[%s] to kill abruptly", taskIdToKill));
}, true, 2000, 100, StringUtils.format("Figuring out PID for task[%s] to kill abruptly", taskIdToKill));
}
}

View File

@ -2,6 +2,22 @@
{
"query": "SELECT __time, isRobot, added, delta, deleted, namespace FROM %%DATASOURCE%%",
"expectedResults": [
{
"__time": 1377910953000,
"isRobot": null,
"added": 57,
"delta": -143,
"deleted": 200,
"namespace": "article"
},
{
"__time": 1377910953000,
"isRobot": null,
"added": 57,
"delta": -143,
"deleted": 200,
"namespace": "article"
},
{
"__time": 1377910953000,
"isRobot": null,
@ -34,6 +50,38 @@
"deleted": 129,
"namespace": "wikipedia"
},
{
"__time": 1377919965000,
"isRobot": null,
"added": 459,
"delta": 330,
"deleted": 129,
"namespace": "wikipedia"
},
{
"__time": 1377919965000,
"isRobot": null,
"added": 459,
"delta": 330,
"deleted": 129,
"namespace": "wikipedia"
},
{
"__time": 1377933081000,
"isRobot": null,
"added": 123,
"delta": 111,
"deleted": 12,
"namespace": "article"
},
{
"__time": 1377933081000,
"isRobot": null,
"added": 123,
"delta": 111,
"deleted": 12,
"namespace": "article"
},
{
"__time": 1377933081000,
"isRobot": null,

View File

@ -28,7 +28,7 @@
# This Dockerfile prefers to use the COPY command over ADD.
# See: https://phoenixnap.com/kb/docker-add-vs-copy
ARG JDK_VERSION=8-slim-buster
ARG JDK_VERSION=17-slim-buster
# The FROM image provides Java on top of Debian, and
# thus provides bash, apt-get, etc.

View File

@ -88,8 +88,7 @@ if your test has the annotation: `@Test(groups = TestNGGroup.BATCH_INDEX)` then
* Add `-pl :druid-integration-tests` when running integration tests for the second time or later without changing
the code of core modules in between to skip up-to-date checks for the whole module dependency tree.
* Integration tests can also be run with either Java 8 or Java 11 by adding `-Djvm.runtime=#` to the `mvn` command, where `#`
can either be 8 or 11.
* Integration tests can also be run with a specific Java version by adding `-Djvm.runtime=#` to the `mvn` command (where `#` can be 11, for example).
* Druid's configuration (using Docker) can be overridden by providing `-Doverride.config.path=<PATH_TO_FILE>`.
The file must contain one property per line, the key must start with `druid_` and the format should be snake case.

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG JDK_VERSION=8-slim-buster
ARG JDK_VERSION=17-slim-buster
FROM openjdk:$JDK_VERSION as druidbase
# Bundle everything into one script so cleanup can reduce image size.

View File

@ -281,6 +281,7 @@
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
@ -371,7 +372,6 @@
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-servlet</artifactId>
<version>${guice.version}</version>
</dependency>
<dependency>
<groupId>io.confluent</groupId>
@ -772,6 +772,7 @@
</property>
</properties>
<argLine>
${jdk.strong.encapsulation.argLine}
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.test.config.type=configFile

View File

@ -26,7 +26,7 @@ then
else
echo "\$DRUID_INTEGRATION_TEST_JVM_RUNTIME is set with value ${DRUID_INTEGRATION_TEST_JVM_RUNTIME}"
case "${DRUID_INTEGRATION_TEST_JVM_RUNTIME}" in
8 | 11 | 17 | 21)
11 | 17 | 21)
echo "Build druid-cluster with Java $DRUID_INTEGRATION_TEST_JVM_RUNTIME"
docker build -t druid/cluster \
--build-arg JDK_VERSION=$DRUID_INTEGRATION_TEST_JVM_RUNTIME-slim-buster \

View File

@ -371,7 +371,7 @@ name: Guice
license_category: binary
module: java-core
license_name: Apache License version 2.0
version: 4.1.0
version: 4.2.2
libraries:
- com.google.inject: guice
- com.google.inject.extensions: guice-multibindings
@ -3165,7 +3165,7 @@ libraries:
---
name: Apache Kafka
version: 3.6.1
version: 3.9.0
license_category: binary
module: extensions/druid-kafka-indexing-service
license_name: Apache License version 2.0
@ -3174,7 +3174,7 @@ libraries:
notices:
- kafka-clients: |
Apache Kafka
Copyright 2023 The Apache Software Foundation.
Copyright 2024 The Apache Software Foundation.
This product includes software developed at
The Apache Software Foundation (https://www.apache.org/).
@ -5125,15 +5125,6 @@ version: 5.2.5
---
name: "@druid-toolkit/query"
license_category: binary
module: web-console
license_name: Apache License version 2.0
copyright: Imply Data
version: 0.22.23
---
name: "@emotion/cache"
license_category: binary
module: web-console
@ -5224,6 +5215,16 @@ license_file_path: licenses/bin/@emotion-weak-memoize.MIT
---
name: "@flatten-js/interval-tree"
license_category: binary
module: web-console
license_name: MIT License
copyright: Alex Bol
version: 1.1.3
license_file_path: licenses/bin/@flatten-js-interval-tree.MIT
---
name: "@fontsource/open-sans"
license_category: binary
module: web-console
@ -5234,6 +5235,15 @@ license_file_path: licenses/bin/@fontsource-open-sans.OFL
---
name: "@internationalized/date"
license_category: binary
module: web-console
license_name: Apache License version 2.0
copyright: Adobe
version: 3.5.6
---
name: "@popperjs/core"
license_category: binary
module: web-console
@ -5244,6 +5254,15 @@ license_file_path: licenses/bin/@popperjs-core.MIT
---
name: "@swc/helpers"
license_category: binary
module: web-console
license_name: Apache License version 2.0
copyright: 강동윤
version: 0.5.13
---
name: "@types/parse-json"
license_category: binary
module: web-console
@ -5404,15 +5423,6 @@ license_file_path: licenses/bin/change-case.MIT
---
name: "chronoshift"
license_category: binary
module: web-console
license_name: Apache License version 2.0
copyright: Vadim Ogievetsky
version: 0.10.0
---
name: "classnames"
license_category: binary
module: web-console
@ -5702,6 +5712,15 @@ license_file_path: licenses/bin/dot-case.MIT
---
name: "druid-query-toolkit"
license_category: binary
module: web-console
license_name: Apache License version 2.0
copyright: Imply Data
version: 1.0.0
---
name: "echarts"
license_category: binary
module: web-console
@ -5801,16 +5820,6 @@ license_file_path: licenses/bin/has-flag.MIT
---
name: "has-own-prop"
license_category: binary
module: web-console
license_name: MIT License
copyright: Sindre Sorhus
version: 2.0.0
license_file_path: licenses/bin/has-own-prop.MIT
---
name: "hasown"
license_category: binary
module: web-console
@ -5871,15 +5880,6 @@ license_file_path: licenses/bin/iconv-lite.MIT
---
name: "immutable-class"
license_category: binary
module: web-console
license_name: Apache License version 2.0
copyright: Vadim Ogievetsky
version: 0.11.2
---
name: "import-fresh"
license_category: binary
module: web-console
@ -6060,26 +6060,6 @@ license_file_path: licenses/bin/mime-types.MIT
---
name: "moment-timezone"
license_category: binary
module: web-console
license_name: MIT License
copyright: Tim Wood
version: 0.5.43
license_file_path: licenses/bin/moment-timezone.MIT
---
name: "moment"
license_category: binary
module: web-console
license_name: MIT License
copyright: Iskren Ivov Chernev
version: 2.29.4
license_file_path: licenses/bin/moment.MIT
---
name: "no-case"
license_category: binary
module: web-console

144
pom.xml
View File

@ -70,13 +70,12 @@
</scm>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<java.version>8</java.version>
<java.version>11</java.version>
<maven.compiler.release>${java.version}</maven.compiler.release>
<project.build.resourceEncoding>UTF-8</project.build.resourceEncoding>
<aether.version>0.9.0.M2</aether.version>
<apache.curator.version>5.5.0</apache.curator.version>
<apache.kafka.version>3.6.1</apache.kafka.version>
<apache.kafka.version>3.9.0</apache.kafka.version>
<!-- when updating apache ranger, verify the usage of aws-bundle-sdk vs aws-logs-sdk
and update as needed in extensions-core/druid-ranger-security/pm.xml -->
<apache.ranger.version>2.4.0</apache.ranger.version>
@ -96,7 +95,7 @@
<errorprone.version>2.35.1</errorprone.version>
<fastutil.version>8.5.4</fastutil.version>
<guava.version>32.0.1-jre</guava.version>
<guice.version>4.1.0</guice.version>
<guice.version>4.2.2</guice.version>
<hamcrest.version>1.3</hamcrest.version>
<jetty.version>9.4.56.v20240826</jetty.version>
<jersey.version>1.19.4</jersey.version>
@ -115,10 +114,6 @@
<jna-platform.version>5.13.0</jna-platform.version>
<hadoop.compile.version>3.3.6</hadoop.compile.version>
<mockito.version>5.14.2</mockito.version>
<!-- mockito-inline artifact was removed in mockito 5.3 (mockito 5.x is required for Java >17),
however it is required in some cases when running against mockito 4.x (mockito 4.x is required for Java <11.
We use the following property to pick the proper artifact based on Java version (see pre-java-11 profile) -->
<mockito.inline.artifact>core</mockito.inline.artifact>
<aws.sdk.version>1.12.638</aws.sdk.version>
<caffeine.version>2.8.0</caffeine.version>
<jacoco.version>0.8.12</jacoco.version>
@ -131,7 +126,33 @@
<com.google.http.client.apis.version>1.42.3</com.google.http.client.apis.version>
<com.google.apis.compute.version>v1-rev20230606-2.0.0</com.google.apis.compute.version>
<com.google.cloud.storage.version>2.29.1</com.google.cloud.storage.version>
<jdk.strong.encapsulation.argLine><!-- empty placeholder --></jdk.strong.encapsulation.argLine>
<jdk.strong.encapsulation.argLine>
<!-- Strong encapsulation parameters -->
<!-- When updating this list, update all four locations: -->
<!-- 1) ForkingTaskRunner#STRONG_ENCAPSULATION_PROPERTIES -->
<!-- 2) docs/operations/java.md, "Strong encapsulation" section -->
<!-- 3) pom.xml, jdk.strong.encapsulation.argLine (here) -->
<!-- 4) examples/bin/run-java script -->
<!-- required for DataSketches Memory -->
--add-exports=java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports=java.base/jdk.internal.misc=ALL-UNNAMED
--add-opens=java.base/java.nio=ALL-UNNAMED
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED
<!-- required for NativeIO#getfd -->
--add-opens=java.base/java.io=ALL-UNNAMED
<!-- required for Guice -->
--add-opens=java.base/java.lang=ALL-UNNAMED
<!-- required for metrics -->
--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED
<!-- required for certain EqualsVerifier tests (not required in production) -->
--add-opens=java.base/java.util=ALL-UNNAMED
</jdk.strong.encapsulation.argLine>
<jdk.security.manager.allow.argLine><!-- empty placeholder --></jdk.security.manager.allow.argLine>
<repoOrgId>maven.org</repoOrgId>
<repoOrgName>Maven Central Repository</repoOrgName>
@ -271,6 +292,16 @@
<enabled>false</enabled>
</snapshots>
</repository>
<!--
maven-dependency-plugin:3.1.2 seems to have updated HTTP repository access behavior.
We get the following error "Blocked mirror for repositories: [twitter (http://maven.twttr.com, default, releases+snapshots)]"
The suggested action step is to add the mirror: https://maven.apache.org/docs/3.8.1/release-notes.html#how-to-fix-when-i-get-a-http-repository-blocked
-->
<repository>
<id>twitter</id>
<url>https://maven.twttr.com</url>
</repository>
</repositories>
<pluginRepositories>
@ -1507,13 +1538,13 @@
<plugin>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<version>4.2.0</version>
<version>4.8.6.6</version>
<dependencies>
<!-- overwrite dependency on spotbugs if you want to specify the version of spotbugs -->
<dependency>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs</artifactId>
<version>4.2.2</version>
<version>4.8.6</version>
</dependency>
</dependencies>
<configuration>
@ -1524,7 +1555,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-pmd-plugin</artifactId>
<version>3.16.0</version>
<version>3.26.0</version>
<configuration>
<linkXRef>false</linkXRef> <!-- prevent "Unable to locate Source XRef to link to" warning -->
<printFailingErrors>true</printFailingErrors>
@ -1534,6 +1565,7 @@
<excludeRoots>
<excludeRoot>target/generated-sources/</excludeRoot>
</excludeRoots>
<targetJdk>${maven.compiler.release}</targetJdk>
</configuration>
<executions>
<execution>
@ -1554,7 +1586,7 @@
<bundledSignatures>
<!--
This will automatically choose the right
signatures based on 'maven.compiler.target':
signatures based on 'maven.compiler.release':
-->
<bundledSignature>jdk-unsafe</bundledSignature>
</bundledSignatures>
@ -1622,6 +1654,16 @@
<ignore>sun.misc.Unsafe</ignore>
<!-- ignore java reflection polymorphic api signatures -->
<ignore>java.lang.invoke.MethodHandle</ignore>
<!--
For the following java.nio.* classes, we get errors like: "Undefined reference: java.nio.ByteBuffer java.nio.ByteBuffer.clear()"
GitHub issue: https://github.com/mojohaus/animal-sniffer/issues/4
-->
<ignore>java.nio.ByteBuffer</ignore>
<ignore>java.nio.IntBuffer</ignore>
<ignore>java.nio.CharBuffer</ignore>
<ignore>java.nio.FloatBuffer</ignore>
<ignore>java.nio.DoubleBuffer</ignore>
<ignore>java.nio.MappedByteBuffer</ignore>
</ignores>
</configuration>
</execution>
@ -1830,7 +1872,12 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.1.1</version>
<version>3.3.0</version>
<configuration>
<ignoredNonTestScopedDependencies>
<ignoredNonTestScopedDependency>*</ignoredNonTestScopedDependency>
</ignoredNonTestScopedDependencies>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@ -1916,9 +1963,9 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<inherited>true</inherited>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<release>${maven.compiler.release}</release>
</configuration>
</plugin>
<plugin>
@ -1931,18 +1978,6 @@
</build>
<profiles>
<!-- mockito 5.x dropped support for Java 8, but is necessary to test against Java >17 -->
<profile>
<id>pre-java-11</id>
<activation>
<jdk>(,11)</jdk>
</activation>
<properties>
<!-- mockito-inline was removed in mockito 5.3, but is necessary when running against mockito 4.x for Java 8 -->
<mockito.version>4.11.0</mockito.version>
<mockito.inline.artifact>inline</mockito.inline.artifact>
</properties>
</profile>
<profile>
<id>java-12+</id>
<activation>
@ -1956,54 +1991,6 @@
</jdk.security.manager.allow.argLine>
</properties>
</profile>
<profile>
<id>java-9+</id>
<activation>
<jdk>[9,)</jdk>
</activation>
<properties>
<jdk.strong.encapsulation.argLine>
<!-- Strong encapsulation parameters -->
<!-- When updating this list, update all four locations: -->
<!-- 1) ForkingTaskRunner#STRONG_ENCAPSULATION_PROPERTIES -->
<!-- 2) docs/operations/java.md, "Strong encapsulation" section -->
<!-- 3) pom.xml, jdk.strong.encapsulation.argLine (here) -->
<!-- 4) examples/bin/run-java script -->
<!-- required for DataSketches Memory -->
--add-exports=java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports=java.base/jdk.internal.misc=ALL-UNNAMED
--add-opens=java.base/java.nio=ALL-UNNAMED
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED
<!-- required for NativeIO#getfd -->
--add-opens=java.base/java.io=ALL-UNNAMED
<!-- required for Guice -->
--add-opens=java.base/java.lang=ALL-UNNAMED
<!-- required for metrics -->
--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED
<!-- required for certain EqualsVerifier tests (not required in production) -->
--add-opens=java.base/java.util=ALL-UNNAMED
</jdk.strong.encapsulation.argLine>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<inherited>true</inherited>
<!-- prefer release instead of source/target in JDK 9 and above -->
<configuration>
<release>${java.version}</release>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>strict</id>
<activation>
@ -2034,9 +2021,10 @@
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED</arg>
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED</arg>
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED</arg>
<arg>-J--add-exports=java.base/sun.nio.ch=ALL-UNNAMED</arg>
<arg>-J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED</arg>
<arg>-J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED</arg>
</compilerArgs>
</compilerArgs>
<annotationProcessorPaths>
<path>
<groupId>com.google.errorprone</groupId>

View File

@ -106,6 +106,7 @@
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
@ -380,18 +381,12 @@
<artifactId>caliper</artifactId>
<scope>test</scope>
</dependency>
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${mockito.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>nl.jqno.equalsverifier</groupId>
<artifactId>equalsverifier</artifactId>

View File

@ -59,7 +59,7 @@ NULL : 'null';
LONG : [0-9]+;
EXP: [eE] [-]? LONG;
// DOUBLE provides partial support for java double format
// see: https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#valueOf-java.lang.String-
// see: https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/Double.html#valueOf(java.lang.String)
DOUBLE : 'NaN' | 'Infinity' | (LONG '.' LONG?) | (LONG EXP) | (LONG '.' LONG? EXP);
IDENTIFIER : [_$a-zA-Z][_$a-zA-Z0-9]* | '"' (ESC | ~ [\"\\])* '"';
WS : [ \t\r\n]+ -> skip ;

View File

@ -49,4 +49,9 @@ public interface BlockingPool<T>
* @return count of pending requests
*/
long getPendingRequests();
/**
* @return number of used buffers from the pool
*/
long getUsedResourcesCount();
}

View File

@ -119,7 +119,7 @@ public class DefaultBlockingPool<T> implements BlockingPool<T>
throw new RuntimeException(e);
}
finally {
pendingRequests.incrementAndGet();
pendingRequests.decrementAndGet();
}
}
@ -129,6 +129,12 @@ public class DefaultBlockingPool<T> implements BlockingPool<T>
return pendingRequests.get();
}
@Override
public long getUsedResourcesCount()
{
return maxSize - objects.size();
}
private List<T> pollObjects(int elementNum) throws InterruptedException
{
final List<T> list = new ArrayList<>(elementNum);

View File

@ -61,4 +61,10 @@ public final class DummyBlockingPool<T> implements BlockingPool<T>
{
return 0;
}
@Override
public long getUsedResourcesCount()
{
return 0;
}
}

View File

@ -42,7 +42,7 @@ import java.security.spec.KeySpec;
* using javax.crypto package.
*
* To learn about possible algorithms supported and their names,
* See https://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html
* See https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html
*/
public class CryptoService
{

View File

@ -32,6 +32,7 @@ import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.query.OrderBy;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.AggregateProjectionMetadata;
import org.apache.druid.segment.Cursors;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder;
@ -39,6 +40,7 @@ import org.apache.druid.utils.CollectionUtils;
import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
@ -72,10 +74,10 @@ public class AggregateProjectionSpec
)
{
this.name = name;
if (CollectionUtils.isNullOrEmpty(groupingColumns)) {
throw InvalidInput.exception("groupingColumns must not be null or empty");
if (CollectionUtils.isNullOrEmpty(groupingColumns) && (aggregators == null || aggregators.length == 0)) {
throw InvalidInput.exception("groupingColumns and aggregators must not both be null or empty");
}
this.groupingColumns = groupingColumns;
this.groupingColumns = groupingColumns == null ? Collections.emptyList() : groupingColumns;
this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY : virtualColumns;
// in the future this should be expanded to support user specified ordering, but for now we compute it based on
// the grouping columns, which is consistent with how rollup ordering works for incremental index base table
@ -169,6 +171,10 @@ public class AggregateProjectionSpec
private static ProjectionOrdering computeOrdering(VirtualColumns virtualColumns, List<DimensionSchema> groupingColumns)
{
if (groupingColumns.isEmpty()) {
// call it time ordered, there is no grouping columns so there is only 1 row for this projection
return new ProjectionOrdering(Cursors.ascendingTimeOrder(), null);
}
final List<OrderBy> ordering = Lists.newArrayListWithCapacity(groupingColumns.size());
String timeColumnName = null;

View File

@ -808,7 +808,7 @@ public class ParallelMergeCombiningSequence<T> extends YieldingSequenceBase<T>
/**
* {@link ForkJoinPool} friendly {@link BlockingQueue} feeder, adapted from 'QueueTaker' of Java documentation on
* {@link ForkJoinPool.ManagedBlocker},
* https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ForkJoinPool.ManagedBlocker.html
* https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/concurrent/ForkJoinPool.ManagedBlocker.html
*/
static class QueuePusher<E> implements ForkJoinPool.ManagedBlocker
{

View File

@ -661,4 +661,13 @@ public class QueryContext
"context=" + context +
'}';
}
public boolean isDecoupledMode()
{
String value = getString(
QueryContexts.CTX_NATIVE_QUERY_SQL_PLANNING_MODE,
QueryContexts.NATIVE_QUERY_SQL_PLANNING_MODE_COUPLED
);
return QueryContexts.NATIVE_QUERY_SQL_PLANNING_MODE_DECOUPLED.equals(value);
}
}

View File

@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.Numbers;
import org.apache.druid.java.util.common.StringUtils;
import javax.annotation.Nullable;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.HashMap;
@ -105,6 +106,10 @@ public class QueryContexts
// SQL statement resource specific keys
public static final String CTX_EXECUTION_MODE = "executionMode";
public static final String CTX_NATIVE_QUERY_SQL_PLANNING_MODE = "plannerStrategy";
public static final String NATIVE_QUERY_SQL_PLANNING_MODE_COUPLED = "COUPLED";
public static final String NATIVE_QUERY_SQL_PLANNING_MODE_DECOUPLED = "DECOUPLED";
// Defaults
public static final boolean DEFAULT_BY_SEGMENT = false;
public static final boolean DEFAULT_POPULATE_CACHE = true;

View File

@ -198,4 +198,9 @@ public class UnionDataSource implements DataSource
"dataSources=" + dataSources +
'}';
}
public static boolean isCompatibleDataSource(DataSource dataSource)
{
return (dataSource instanceof TableDataSource || dataSource instanceof InlineDataSource);
}
}

View File

@ -19,18 +19,17 @@
package org.apache.druid.query.aggregation;
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import org.apache.druid.error.DruidException;
import org.apache.druid.segment.serde.cell.IOIterator;
import org.apache.druid.segment.serde.cell.IntSerializer;
import org.apache.druid.segment.serde.cell.StagedSerde;
import org.apache.druid.segment.writeout.WriteOutBytes;
import javax.annotation.Nullable;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
import java.util.NoSuchElementException;
/**
@ -45,109 +44,181 @@ public class SerializedStorage<T>
{
private final WriteOutBytes writeOutBytes;
private final StagedSerde<T> serde;
private final IntSerializer intSerializer = new IntSerializer();
private final ByteBuffer itemOffsetsBytes;
private final IntBuffer itemSizes;
private final LongArrayList rowChunkOffsets = new LongArrayList();
private int numStored = 0;
private int maxSize = 0;
public SerializedStorage(WriteOutBytes writeOutBytes, StagedSerde<T> serde)
{
this(writeOutBytes, serde, 4096);
}
public SerializedStorage(WriteOutBytes writeOutBytes, StagedSerde<T> serde, int chunkSize)
{
this.writeOutBytes = writeOutBytes;
this.serde = serde;
this.itemOffsetsBytes = ByteBuffer.allocate(chunkSize * Integer.BYTES).order(ByteOrder.nativeOrder());
this.itemSizes = itemOffsetsBytes.asIntBuffer();
}
public void store(@Nullable T value) throws IOException
{
byte[] bytes = serde.serialize(value);
writeOutBytes.write(intSerializer.serialize(bytes.length));
writeOutBytes.write(bytes);
maxSize = Math.max(maxSize, bytes.length);
itemSizes.put(bytes.length);
if (bytes.length > 0) {
writeOutBytes.write(bytes);
}
++numStored;
if (itemSizes.remaining() == 0) {
rowChunkOffsets.add(writeOutBytes.size());
writeOutBytes.write(itemOffsetsBytes);
itemOffsetsBytes.clear();
itemSizes.clear();
}
}
public int numStored()
{
return numStored;
}
/**
* Generates an iterator over everything that has been stored. Also signifies the end of storing objects.
* iterator() can be called multiple times if needed, but after iterator() is called, store() can no longer be
* called.
*
* @return an iterator
* @throws IOException on failure
*/
public IOIterator<T> iterator() throws IOException
{
return new DeserializingIOIterator<>(writeOutBytes.asInputStream(), serde);
if (itemSizes.position() != itemSizes.limit()) {
rowChunkOffsets.add(writeOutBytes.size());
itemOffsetsBytes.limit(itemSizes.position() * Integer.BYTES);
writeOutBytes.write(itemOffsetsBytes);
// Move the limit to the position so that we fail subsequent writes and indicate that we are done
itemSizes.limit(itemSizes.position());
}
return new DeserializingIOIterator<>(
writeOutBytes,
rowChunkOffsets,
numStored,
itemSizes.capacity(),
maxSize,
serde
);
}
private static class DeserializingIOIterator<T> implements IOIterator<T>
{
private static final int NEEDS_READ = -2;
private static final int EOF = -1;
private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0).asReadOnlyBuffer();
private final byte[] intBytes;
private final BufferedInputStream inputStream;
private final WriteOutBytes medium;
private final LongArrayList rowChunkOffsets;
private final int numEntries;
private ByteBuffer tmpBuf;
private final StagedSerde<T> serde;
private int nextSize;
private final ByteBuffer itemOffsetsBytes;
private final int[] itemSizes;
public DeserializingIOIterator(InputStream inputStream, StagedSerde<T> serde)
private long itemStartOffset;
private int chunkId = 0;
private int currId = 0;
private int itemIndex;
public DeserializingIOIterator(
WriteOutBytes medium,
LongArrayList rowChunkOffsets,
int numEntries,
int chunkSize,
int maxSize,
StagedSerde<T> serde
)
{
this.inputStream = new BufferedInputStream(inputStream);
this.medium = medium;
this.rowChunkOffsets = rowChunkOffsets;
this.numEntries = numEntries;
this.tmpBuf = ByteBuffer.allocate(maxSize).order(ByteOrder.nativeOrder());
this.serde = serde;
intBytes = new byte[Integer.BYTES];
nextSize = NEEDS_READ;
this.itemOffsetsBytes = ByteBuffer.allocate(chunkSize * Integer.BYTES).order(ByteOrder.nativeOrder());
this.itemSizes = new int[chunkSize];
this.itemIndex = chunkSize;
}
@Override
public boolean hasNext() throws IOException
public boolean hasNext()
{
return getNextSize() > EOF;
return currId < numEntries;
}
@Override
public T next() throws IOException
{
int currentNextSize = getNextSize();
if (currentNextSize == -1) {
throw new NoSuchElementException("end of buffer reached");
if (currId >= numEntries) {
throw new NoSuchElementException();
}
byte[] nextBytes = new byte[currentNextSize];
int bytesRead = 0;
while (bytesRead < currentNextSize) {
int result = inputStream.read(nextBytes, bytesRead, currentNextSize - bytesRead);
if (result == -1) {
throw new NoSuchElementException("unexpected end of buffer reached");
}
bytesRead += result;
}
Preconditions.checkState(bytesRead == currentNextSize);
T value = serde.deserialize(nextBytes);
nextSize = NEEDS_READ;
return value;
}
private int getNextSize() throws IOException
{
if (nextSize == NEEDS_READ) {
int bytesRead = 0;
while (bytesRead < Integer.BYTES) {
int result = inputStream.read(intBytes, bytesRead, Integer.BYTES - bytesRead);
if (result == -1) {
nextSize = EOF;
return EOF;
} else {
bytesRead += result;
if (itemIndex >= itemSizes.length) {
if (chunkId == 0) {
itemStartOffset = 0;
} else {
if (itemStartOffset != rowChunkOffsets.getLong(chunkId - 1)) {
throw DruidException.defensive(
"Should have read up to the start of the offsets [%,d], "
+ "but for some reason the values [%,d] don't align. Possible corruption?",
rowChunkOffsets.getLong(chunkId - 1),
itemStartOffset
);
}
itemStartOffset += (((long) itemSizes.length) * Integer.BYTES);
}
Preconditions.checkState(bytesRead == Integer.BYTES);
nextSize = ByteBuffer.wrap(intBytes).order(ByteOrder.nativeOrder()).getInt();
int numToRead = Math.min(itemSizes.length, numEntries - (chunkId * itemSizes.length));
final long readOffset = rowChunkOffsets.getLong(chunkId++);
itemOffsetsBytes.clear();
itemOffsetsBytes.limit(numToRead * Integer.BYTES);
medium.readFully(readOffset, itemOffsetsBytes);
itemOffsetsBytes.flip();
itemOffsetsBytes.asIntBuffer().get(itemSizes, 0, numToRead);
itemIndex = 0;
}
return nextSize;
int bytesToRead = itemSizes[itemIndex];
final T retVal;
if (bytesToRead == 0) {
retVal = serde.deserialize(EMPTY_BUFFER);
} else {
tmpBuf.clear();
tmpBuf.limit(bytesToRead);
medium.readFully(itemStartOffset, tmpBuf);
tmpBuf.flip();
retVal = serde.deserialize(tmpBuf);
}
itemStartOffset += bytesToRead;
++itemIndex;
++currId;
return retVal;
}
@Override
public void close() throws IOException
public void close()
{
inputStream.close();
}
}
}

View File

@ -100,6 +100,7 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
private final GroupByQueryConfig queryConfig;
private final GroupByQueryMetricsFactory queryMetricsFactory;
private final GroupByResourcesReservationPool groupByResourcesReservationPool;
private final GroupByStatsProvider groupByStatsProvider;
@VisibleForTesting
public GroupByQueryQueryToolChest(
@ -111,7 +112,24 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
groupingEngine,
GroupByQueryConfig::new,
DefaultGroupByQueryMetricsFactory.instance(),
groupByResourcesReservationPool
groupByResourcesReservationPool,
new GroupByStatsProvider()
);
}
@VisibleForTesting
public GroupByQueryQueryToolChest(
GroupingEngine groupingEngine,
GroupByResourcesReservationPool groupByResourcesReservationPool,
GroupByStatsProvider groupByStatsProvider
)
{
this(
groupingEngine,
GroupByQueryConfig::new,
DefaultGroupByQueryMetricsFactory.instance(),
groupByResourcesReservationPool,
groupByStatsProvider
);
}
@ -120,13 +138,15 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
GroupingEngine groupingEngine,
Supplier<GroupByQueryConfig> queryConfigSupplier,
GroupByQueryMetricsFactory queryMetricsFactory,
@Merging GroupByResourcesReservationPool groupByResourcesReservationPool
@Merging GroupByResourcesReservationPool groupByResourcesReservationPool,
GroupByStatsProvider groupByStatsProvider
)
{
this.groupingEngine = groupingEngine;
this.queryConfig = queryConfigSupplier.get();
this.queryMetricsFactory = queryMetricsFactory;
this.groupByResourcesReservationPool = groupByResourcesReservationPool;
this.groupByStatsProvider = groupByStatsProvider;
}
@Override
@ -170,7 +190,15 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
{
// Reserve the group by resources (merge buffers) required for executing the query
final QueryResourceId queryResourceId = query.context().getQueryResourceId();
groupByResourcesReservationPool.reserve(queryResourceId, query, willMergeRunner);
final GroupByStatsProvider.PerQueryStats perQueryStats =
groupByStatsProvider.getPerQueryStatsContainer(query.context().getQueryResourceId());
groupByResourcesReservationPool.reserve(
queryResourceId,
query,
willMergeRunner,
perQueryStats
);
final GroupByQueryResources resource = groupByResourcesReservationPool.fetch(queryResourceId);
if (resource == null) {
@ -180,16 +208,20 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
);
}
try {
Closer closer = Closer.create();
final Sequence<ResultRow> mergedSequence = mergeGroupByResults(
query,
resource,
runner,
context
context,
closer,
perQueryStats
);
Closer closer = Closer.create();
// Clean up the resources reserved during the execution of the query
closer.register(() -> groupByResourcesReservationPool.clean(queryResourceId));
closer.register(() -> groupByStatsProvider.closeQuery(query.context().getQueryResourceId()));
return Sequences.withBaggage(mergedSequence, closer);
}
catch (Exception e) {
@ -203,20 +235,24 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
final GroupByQuery query,
GroupByQueryResources resource,
QueryRunner<ResultRow> runner,
ResponseContext context
ResponseContext context,
Closer closer,
GroupByStatsProvider.PerQueryStats perQueryStats
)
{
if (isNestedQueryPushDown(query)) {
return mergeResultsWithNestedQueryPushDown(query, resource, runner, context);
return mergeResultsWithNestedQueryPushDown(query, resource, runner, context, perQueryStats);
}
return mergeGroupByResultsWithoutPushDown(query, resource, runner, context);
return mergeGroupByResultsWithoutPushDown(query, resource, runner, context, closer, perQueryStats);
}
private Sequence<ResultRow> mergeGroupByResultsWithoutPushDown(
GroupByQuery query,
GroupByQueryResources resource,
QueryRunner<ResultRow> runner,
ResponseContext context
ResponseContext context,
Closer closer,
GroupByStatsProvider.PerQueryStats perQueryStats
)
{
// If there's a subquery, merge subquery results and then apply the aggregator
@ -241,6 +277,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
}
subqueryContext.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
subquery = (GroupByQuery) ((QueryDataSource) dataSource).getQuery().withOverriddenContext(subqueryContext);
closer.register(() -> groupByStatsProvider.closeQuery(subquery.context().getQueryResourceId()));
}
catch (ClassCastException e) {
throw new UnsupportedOperationException("Subqueries must be of type 'group by'");
@ -250,7 +288,9 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
subquery,
resource,
runner,
context
context,
closer,
perQueryStats
);
final Sequence<ResultRow> finalizingResults = finalizeSubqueryResults(subqueryResult, subquery);
@ -259,7 +299,14 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
return groupingEngine.processSubtotalsSpec(
query,
resource,
groupingEngine.processSubqueryResult(subquery, query, resource, finalizingResults, false)
groupingEngine.processSubqueryResult(
subquery,
query, resource,
finalizingResults,
false,
perQueryStats
),
perQueryStats
);
} else {
return groupingEngine.applyPostProcessing(
@ -268,7 +315,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
query,
resource,
finalizingResults,
false
false,
perQueryStats
),
query
);
@ -279,7 +327,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
return groupingEngine.processSubtotalsSpec(
query,
resource,
groupingEngine.mergeResults(runner, query.withSubtotalsSpec(null), context)
groupingEngine.mergeResults(runner, query.withSubtotalsSpec(null), context),
perQueryStats
);
} else {
return groupingEngine.applyPostProcessing(groupingEngine.mergeResults(runner, query, context), query);
@ -291,7 +340,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
GroupByQuery query,
GroupByQueryResources resource,
QueryRunner<ResultRow> runner,
ResponseContext context
ResponseContext context,
GroupByStatsProvider.PerQueryStats perQueryStats
)
{
Sequence<ResultRow> pushDownQueryResults = groupingEngine.mergeResults(runner, query, context);
@ -303,7 +353,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
rewrittenQuery,
resource,
finalizedResults,
true
true,
perQueryStats
),
query
);

View File

@ -86,17 +86,17 @@ public class GroupByResourcesReservationPool
/**
* Map of query's resource id -> group by resources reserved for the query to execute
*/
final ConcurrentHashMap<QueryResourceId, AtomicReference<GroupByQueryResources>> pool = new ConcurrentHashMap<>();
private final ConcurrentHashMap<QueryResourceId, AtomicReference<GroupByQueryResources>> pool = new ConcurrentHashMap<>();
/**
* Buffer pool from where the merge buffers are picked and reserved
*/
final BlockingPool<ByteBuffer> mergeBufferPool;
private final BlockingPool<ByteBuffer> mergeBufferPool;
/**
* Group by query config of the server
*/
final GroupByQueryConfig groupByQueryConfig;
private final GroupByQueryConfig groupByQueryConfig;
@Inject
public GroupByResourcesReservationPool(
@ -112,8 +112,14 @@ public class GroupByResourcesReservationPool
* Reserves appropriate resources, and maps it to the queryResourceId (usually the query's resource id) in the internal map.
* This is a blocking call, and can block up to the given query's timeout
*/
public void reserve(QueryResourceId queryResourceId, GroupByQuery groupByQuery, boolean willMergeRunner)
public void reserve(
QueryResourceId queryResourceId,
GroupByQuery groupByQuery,
boolean willMergeRunner,
GroupByStatsProvider.PerQueryStats perQueryStats
)
{
long startNs = System.nanoTime();
if (queryResourceId == null) {
throw DruidException.defensive("Query resource id must be populated");
}
@ -145,6 +151,8 @@ public class GroupByResourcesReservationPool
// Resources have been allocated, spot has been reserved. The reference would ALWAYS refer to 'null'. Refer the
// allocated resources from it
reference.compareAndSet(null, resources);
perQueryStats.mergeBufferAcquisitionTime(System.nanoTime() - startNs);
}
/**

View File

@ -0,0 +1,190 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.groupby;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.query.QueryResourceId;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
/**
* Metrics collector for groupBy queries like spilled bytes, merge buffer acquistion time, dictionary size.
*/
@LazySingleton
public class GroupByStatsProvider
{
private final Map<QueryResourceId, PerQueryStats> perQueryStats;
private final AggregateStats aggregateStatsContainer;
public GroupByStatsProvider()
{
this.perQueryStats = new ConcurrentHashMap<>();
this.aggregateStatsContainer = new AggregateStats();
}
public PerQueryStats getPerQueryStatsContainer(QueryResourceId resourceId)
{
if (resourceId == null) {
return null;
}
return perQueryStats.computeIfAbsent(resourceId, value -> new PerQueryStats());
}
public synchronized void closeQuery(QueryResourceId resourceId)
{
if (resourceId == null || !perQueryStats.containsKey(resourceId)) {
return;
}
PerQueryStats container = perQueryStats.remove(resourceId);
aggregateStatsContainer.addQueryStats(container);
}
public synchronized AggregateStats getStatsSince()
{
return aggregateStatsContainer.reset();
}
public static class AggregateStats
{
private long mergeBufferQueries = 0;
private long mergeBufferAcquisitionTimeNs = 0;
private long spilledQueries = 0;
private long spilledBytes = 0;
private long mergeDictionarySize = 0;
public AggregateStats()
{
}
public AggregateStats(
long mergeBufferQueries,
long mergeBufferAcquisitionTimeNs,
long spilledQueries,
long spilledBytes,
long mergeDictionarySize
)
{
this.mergeBufferQueries = mergeBufferQueries;
this.mergeBufferAcquisitionTimeNs = mergeBufferAcquisitionTimeNs;
this.spilledQueries = spilledQueries;
this.spilledBytes = spilledBytes;
this.mergeDictionarySize = mergeDictionarySize;
}
public long getMergeBufferQueries()
{
return mergeBufferQueries;
}
public long getMergeBufferAcquisitionTimeNs()
{
return mergeBufferAcquisitionTimeNs;
}
public long getSpilledQueries()
{
return spilledQueries;
}
public long getSpilledBytes()
{
return spilledBytes;
}
public long getMergeDictionarySize()
{
return mergeDictionarySize;
}
public void addQueryStats(PerQueryStats perQueryStats)
{
if (perQueryStats.getMergeBufferAcquisitionTimeNs() > 0) {
mergeBufferQueries++;
mergeBufferAcquisitionTimeNs += perQueryStats.getMergeBufferAcquisitionTimeNs();
}
if (perQueryStats.getSpilledBytes() > 0) {
spilledQueries++;
spilledBytes += perQueryStats.getSpilledBytes();
}
mergeDictionarySize += perQueryStats.getMergeDictionarySize();
}
public AggregateStats reset()
{
AggregateStats aggregateStats =
new AggregateStats(
mergeBufferQueries,
mergeBufferAcquisitionTimeNs,
spilledQueries,
spilledBytes,
mergeDictionarySize
);
this.mergeBufferQueries = 0;
this.mergeBufferAcquisitionTimeNs = 0;
this.spilledQueries = 0;
this.spilledBytes = 0;
this.mergeDictionarySize = 0;
return aggregateStats;
}
}
public static class PerQueryStats
{
private final AtomicLong mergeBufferAcquisitionTimeNs = new AtomicLong(0);
private final AtomicLong spilledBytes = new AtomicLong(0);
private final AtomicLong mergeDictionarySize = new AtomicLong(0);
public void mergeBufferAcquisitionTime(long delay)
{
mergeBufferAcquisitionTimeNs.addAndGet(delay);
}
public void spilledBytes(long bytes)
{
spilledBytes.addAndGet(bytes);
}
public void dictionarySize(long size)
{
mergeDictionarySize.addAndGet(size);
}
public long getMergeBufferAcquisitionTimeNs()
{
return mergeBufferAcquisitionTimeNs.get();
}
public long getSpilledBytes()
{
return spilledBytes.get();
}
public long getMergeDictionarySize()
{
return mergeDictionarySize.get();
}
}
}

View File

@ -121,6 +121,7 @@ public class GroupingEngine
private final ObjectMapper jsonMapper;
private final ObjectMapper spillMapper;
private final QueryWatcher queryWatcher;
private final GroupByStatsProvider groupByStatsProvider;
@Inject
public GroupingEngine(
@ -129,7 +130,8 @@ public class GroupingEngine
@Merging GroupByResourcesReservationPool groupByResourcesReservationPool,
@Json ObjectMapper jsonMapper,
@Smile ObjectMapper spillMapper,
QueryWatcher queryWatcher
QueryWatcher queryWatcher,
GroupByStatsProvider groupByStatsProvider
)
{
this.processingConfig = processingConfig;
@ -138,6 +140,7 @@ public class GroupingEngine
this.jsonMapper = jsonMapper;
this.spillMapper = spillMapper;
this.queryWatcher = queryWatcher;
this.groupByStatsProvider = groupByStatsProvider;
}
/**
@ -452,7 +455,8 @@ public class GroupingEngine
processingConfig.getNumThreads(),
processingConfig.intermediateComputeSizeBytes(),
spillMapper,
processingConfig.getTmpDir()
processingConfig.getTmpDir(),
groupByStatsProvider
);
}
@ -587,7 +591,8 @@ public class GroupingEngine
GroupByQuery query,
GroupByQueryResources resource,
Sequence<ResultRow> subqueryResult,
boolean wasQueryPushedDown
boolean wasQueryPushedDown,
GroupByStatsProvider.PerQueryStats perQueryStats
)
{
// Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
@ -614,7 +619,8 @@ public class GroupingEngine
resource,
spillMapper,
processingConfig.getTmpDir(),
processingConfig.intermediateComputeSizeBytes()
processingConfig.intermediateComputeSizeBytes(),
perQueryStats
);
final GroupByRowProcessor.ResultSupplier finalResultSupplier = resultSupplier;
@ -644,7 +650,8 @@ public class GroupingEngine
public Sequence<ResultRow> processSubtotalsSpec(
GroupByQuery query,
GroupByQueryResources resource,
Sequence<ResultRow> queryResult
Sequence<ResultRow> queryResult,
GroupByStatsProvider.PerQueryStats perQueryStats
)
{
// How it works?
@ -695,7 +702,8 @@ public class GroupingEngine
resource,
spillMapper,
processingConfig.getTmpDir(),
processingConfig.intermediateComputeSizeBytes()
processingConfig.intermediateComputeSizeBytes(),
perQueryStats
);
List<String> queryDimNamesInOrder = baseSubtotalQuery.getDimensionNamesInOrder();
@ -757,7 +765,8 @@ public class GroupingEngine
resource,
spillMapper,
processingConfig.getTmpDir(),
processingConfig.intermediateComputeSizeBytes()
processingConfig.intermediateComputeSizeBytes(),
perQueryStats
);
subtotalsResults.add(

View File

@ -38,6 +38,7 @@ import org.apache.druid.query.QueryInterruptedException;
import org.apache.druid.query.QueryTimeoutException;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.segment.ColumnSelectorFactory;
@ -94,6 +95,7 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
@Nullable
private final ParallelCombiner<KeyType> parallelCombiner;
private final boolean mergeThreadLocal;
private final GroupByStatsProvider.PerQueryStats perQueryStats;
private volatile boolean initialized = false;
@ -113,7 +115,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
final ListeningExecutorService executor,
final int priority,
final boolean hasQueryTimeout,
final long queryTimeoutAt
final long queryTimeoutAt,
final GroupByStatsProvider.PerQueryStats perQueryStats
)
{
this(
@ -137,7 +140,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
queryTimeoutAt,
groupByQueryConfig.getIntermediateCombineDegree(),
groupByQueryConfig.getNumParallelCombineThreads(),
groupByQueryConfig.isMergeThreadLocal()
groupByQueryConfig.isMergeThreadLocal(),
perQueryStats
);
}
@ -162,7 +166,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
final long queryTimeoutAt,
final int intermediateCombineDegree,
final int numParallelCombineThreads,
final boolean mergeThreadLocal
final boolean mergeThreadLocal,
final GroupByStatsProvider.PerQueryStats perQueryStats
)
{
Preconditions.checkArgument(concurrencyHint > 0, "concurrencyHint > 0");
@ -212,6 +217,7 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
}
this.mergeThreadLocal = mergeThreadLocal;
this.perQueryStats = perQueryStats;
}
@Override
@ -238,7 +244,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
false,
limitSpec,
sortHasNonGroupingFields,
sliceSize
sliceSize,
perQueryStats
);
grouper.init();
groupers.add(grouper);

View File

@ -59,6 +59,7 @@ import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryResources;
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey;
@ -103,6 +104,7 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
private final ObjectMapper spillMapper;
private final String processingTmpDir;
private final int mergeBufferSize;
private final GroupByStatsProvider groupByStatsProvider;
public GroupByMergingQueryRunner(
GroupByQueryConfig config,
@ -114,7 +116,8 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
int concurrencyHint,
int mergeBufferSize,
ObjectMapper spillMapper,
String processingTmpDir
String processingTmpDir,
GroupByStatsProvider groupByStatsProvider
)
{
this.config = config;
@ -127,6 +130,7 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
this.spillMapper = spillMapper;
this.processingTmpDir = processingTmpDir;
this.mergeBufferSize = mergeBufferSize;
this.groupByStatsProvider = groupByStatsProvider;
}
@Override
@ -163,6 +167,9 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId())
);
GroupByStatsProvider.PerQueryStats perQueryStats =
groupByStatsProvider.getPerQueryStatsContainer(query.context().getQueryResourceId());
final int priority = queryContext.getPriority();
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
@ -182,8 +189,10 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(
temporaryStorageDirectory,
querySpecificConfig.getMaxOnDiskStorage().getBytes()
querySpecificConfig.getMaxOnDiskStorage().getBytes(),
perQueryStats
);
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder =
ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
resources.register(temporaryStorageHolder);
@ -215,7 +224,8 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
priority,
hasTimeout,
timeoutAt,
mergeBufferSize
mergeBufferSize,
perQueryStats
);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
@ -318,8 +328,8 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
GroupByQueryResources resource = groupByResourcesReservationPool.fetch(queryResourceId);
if (resource == null) {
throw DruidException.defensive(
"Expected merge buffers to be reserved in the reservation pool for the query id [%s] however while executing "
+ "the GroupByMergingQueryRunner, however none were provided.",
"Expected merge buffers to be reserved in the reservation pool for the query resource id [%s] however while executing "
+ "the GroupByMergingQueryRunner none were provided.",
queryResourceId
);
}

View File

@ -879,6 +879,12 @@ public class GroupByQueryEngine
return ImmutableList.of();
}
@Override
public Long getDictionarySize()
{
return 0L;
}
@Override
public ByteBuffer createKey()
{

View File

@ -34,6 +34,7 @@ import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryResources;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey;
@ -93,7 +94,8 @@ public class GroupByRowProcessor
final GroupByQueryResources resource,
final ObjectMapper spillMapper,
final String processingTmpDir,
final int mergeBufferSize
final int mergeBufferSize,
final GroupByStatsProvider.PerQueryStats perQueryStats
)
{
final Closer closeOnExit = Closer.create();
@ -106,7 +108,8 @@ public class GroupByRowProcessor
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(
temporaryStorageDirectory,
querySpecificConfig.getMaxOnDiskStorage().getBytes()
querySpecificConfig.getMaxOnDiskStorage().getBytes(),
perQueryStats
);
closeOnExit.register(temporaryStorage);
@ -128,7 +131,8 @@ public class GroupByRowProcessor
},
temporaryStorage,
spillMapper,
mergeBufferSize
mergeBufferSize,
perQueryStats
);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;

View File

@ -184,6 +184,11 @@ public interface Grouper<KeyType> extends Closeable
*/
List<String> getDictionary();
/**
* Return the estimated size of the dictionary of this KeySerde.
*/
Long getDictionarySize();
/**
* Serialize a key. This will be called by the {@link #aggregate(Object)} method. The buffer will not
* be retained after the aggregate method returns, so reusing buffers is OK.

View File

@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import java.io.Closeable;
import java.io.File;
@ -47,6 +48,8 @@ public class LimitedTemporaryStorage implements Closeable
{
private static final Logger log = new Logger(LimitedTemporaryStorage.class);
private final GroupByStatsProvider.PerQueryStats perQueryStatsContainer;
private final File storageDirectory;
private final long maxBytesUsed;
@ -57,10 +60,15 @@ public class LimitedTemporaryStorage implements Closeable
private boolean createdStorageDirectory = false;
public LimitedTemporaryStorage(File storageDirectory, long maxBytesUsed)
public LimitedTemporaryStorage(
File storageDirectory,
long maxBytesUsed,
GroupByStatsProvider.PerQueryStats perQueryStatsContainer
)
{
this.storageDirectory = storageDirectory;
this.maxBytesUsed = maxBytesUsed;
this.perQueryStatsContainer = perQueryStatsContainer;
}
/**
@ -121,7 +129,7 @@ public class LimitedTemporaryStorage implements Closeable
}
@VisibleForTesting
long currentSize()
public long currentSize()
{
return bytesUsed.get();
}
@ -134,6 +142,11 @@ public class LimitedTemporaryStorage implements Closeable
return;
}
closed = true;
perQueryStatsContainer.spilledBytes(bytesUsed.get());
bytesUsed.set(0);
for (File file : ImmutableSet.copyOf(files)) {
delete(file);
}
@ -199,6 +212,5 @@ public class LimitedTemporaryStorage implements Closeable
throw new TemporaryStorageFullException(maxBytesUsed);
}
}
}
}

View File

@ -58,6 +58,7 @@ import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
@ -131,7 +132,8 @@ public class RowBasedGrouperHelper
final Supplier<ByteBuffer> bufferSupplier,
final LimitedTemporaryStorage temporaryStorage,
final ObjectMapper spillMapper,
final int mergeBufferSize
final int mergeBufferSize,
final GroupByStatsProvider.PerQueryStats perQueryStats
)
{
return createGrouperAccumulatorPair(
@ -148,7 +150,8 @@ public class RowBasedGrouperHelper
UNKNOWN_THREAD_PRIORITY,
false,
UNKNOWN_TIMEOUT,
mergeBufferSize
mergeBufferSize,
perQueryStats
);
}
@ -197,7 +200,8 @@ public class RowBasedGrouperHelper
final int priority,
final boolean hasQueryTimeout,
final long queryTimeoutAt,
final int mergeBufferSize
final int mergeBufferSize,
final GroupByStatsProvider.PerQueryStats perQueryStats
)
{
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
@ -276,7 +280,8 @@ public class RowBasedGrouperHelper
true,
limitSpec,
sortHasNonGroupingFields,
mergeBufferSize
mergeBufferSize,
perQueryStats
);
} else {
final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(
@ -305,7 +310,8 @@ public class RowBasedGrouperHelper
grouperSorter,
priority,
hasQueryTimeout,
queryTimeoutAt
queryTimeoutAt,
perQueryStats
);
}
@ -1282,6 +1288,12 @@ public class RowBasedGrouperHelper
return dictionary;
}
@Override
public Long getDictionarySize()
{
return currentEstimatedSize;
}
@Override
public ByteBuffer toByteBuffer(RowBasedKey key)
{

View File

@ -37,6 +37,7 @@ import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.aggregation.AggregatorAdapters;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.groupby.GroupByStatsProvider;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.segment.ColumnSelectorFactory;
@ -74,6 +75,7 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
private final AggregatorFactory[] aggregatorFactories;
private final Comparator<Grouper.Entry<KeyType>> keyObjComparator;
private final Comparator<Grouper.Entry<KeyType>> defaultOrderKeyObjComparator;
private final GroupByStatsProvider.PerQueryStats perQueryStats;
private final List<File> files = new ArrayList<>();
private final List<File> dictionaryFiles = new ArrayList<>();
@ -95,7 +97,8 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
final boolean spillingAllowed,
final DefaultLimitSpec limitSpec,
final boolean sortHasNonGroupingFields,
final int mergeBufferSize
final int mergeBufferSize,
final GroupByStatsProvider.PerQueryStats perQueryStats
)
{
this.keySerde = keySerdeFactory.factorize();
@ -155,6 +158,7 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
this.spillMapper = keySerde.decorateObjectMapper(spillMapper);
this.spillingAllowed = spillingAllowed;
this.sortHasNonGroupingFields = sortHasNonGroupingFields;
this.perQueryStats = perQueryStats;
}
@Override
@ -214,6 +218,7 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
@Override
public void close()
{
perQueryStats.dictionarySize(keySerde.getDictionarySize());
grouper.close();
keySerde.reset();
deleteFiles();

View File

@ -112,12 +112,8 @@ public class CursorFactoryRowsAndColumns implements CloseableShapeshifter, RowsA
cursor.advance();
}
if (writer == null) {
return new EmptyRowsAndColumns();
} else {
final byte[] bytes = writer.toByteArray();
return new ColumnBasedFrameRowsAndColumns(Frame.wrap(bytes), rowSignature);
}
final byte[] bytes = writer.toByteArray();
return new ColumnBasedFrameRowsAndColumns(Frame.wrap(bytes), rowSignature);
}
}
}

View File

@ -185,14 +185,13 @@ public class TimeseriesQuery extends BaseQuery<Result<TimeseriesResultValue>>
@Override
public RowSignature getResultRowSignature(Finalization finalization)
{
final Finalization finalization1 = finalization;
final RowSignature.Builder builder = RowSignature.builder();
builder.addTimeColumn();
String timestampResultField = getTimestampResultField();
if (StringUtils.isNotEmpty(timestampResultField)) {
builder.add(timestampResultField, ColumnType.LONG);
}
builder.addAggregators(aggregatorSpecs, finalization1);
builder.addAggregators(aggregatorSpecs, finalization);
builder.addPostAggregators(postAggregatorSpecs);
return builder.build();
}

View File

@ -96,6 +96,7 @@ public class UnionQueryLogic implements QueryLogic
Sequence run = runner.run(queryPlus.withQuery(q), responseContext);
seqs.add(run);
}
return Sequences.concat(seqs);
}
}

View File

@ -29,7 +29,7 @@ import com.google.common.collect.Interner;
import com.google.common.collect.Interners;
import com.google.common.collect.Lists;
import org.apache.druid.data.input.impl.AggregateProjectionSpec;
import org.apache.druid.error.InvalidInput;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.query.OrderBy;
@ -40,6 +40,7 @@ import org.apache.druid.utils.CollectionUtils;
import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
@ -167,17 +168,17 @@ public class AggregateProjectionMetadata
@JsonProperty("name") String name,
@JsonProperty("timeColumnName") @Nullable String timeColumnName,
@JsonProperty("virtualColumns") @Nullable VirtualColumns virtualColumns,
@JsonProperty("groupingColumns") List<String> groupingColumns,
@JsonProperty("groupingColumns") @Nullable List<String> groupingColumns,
@JsonProperty("aggregators") @Nullable AggregatorFactory[] aggregators,
@JsonProperty("ordering") List<OrderBy> ordering
)
{
this.name = name;
if (CollectionUtils.isNullOrEmpty(groupingColumns)) {
throw InvalidInput.exception("groupingColumns must not be null or empty");
if (CollectionUtils.isNullOrEmpty(groupingColumns) && (aggregators == null || aggregators.length == 0)) {
throw DruidException.defensive("groupingColumns and aggregators must not both be null or empty");
}
this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY : virtualColumns;
this.groupingColumns = groupingColumns;
this.groupingColumns = groupingColumns == null ? Collections.emptyList() : groupingColumns;
this.aggregators = aggregators == null ? new AggregatorFactory[0] : aggregators;
this.ordering = ordering;

View File

@ -456,7 +456,8 @@ public class IndexIO
new StringUtf8DictionaryEncodedColumnSupplier<>(
index.getDimValueUtf8Lookup(dimension)::singleThreaded,
null,
Suppliers.ofInstance(index.getDimColumn(dimension))
Suppliers.ofInstance(index.getDimColumn(dimension)),
LEGACY_FACTORY.getBitmapFactory()
)
);
GenericIndexed<ImmutableBitmap> bitmaps = index.getBitmapIndexes().get(dimension);

View File

@ -43,7 +43,7 @@ import java.util.Objects;
*/
public class IndexSpec
{
public static IndexSpec DEFAULT = IndexSpec.builder().build();
public static final IndexSpec DEFAULT = IndexSpec.builder().build();
public static Builder builder()
{

View File

@ -20,6 +20,7 @@
package org.apache.druid.segment.column;
import com.google.common.collect.Lists;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.common.semantic.SemanticUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.extraction.ExtractionFn;
@ -73,16 +74,19 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum
@Nullable
private final ColumnarMultiInts multiValueColumn;
private final Indexed<ByteBuffer> utf8Dictionary;
private final BitmapFactory bitmapFactory;
public StringUtf8DictionaryEncodedColumn(
@Nullable ColumnarInts singleValueColumn,
@Nullable ColumnarMultiInts multiValueColumn,
Indexed<ByteBuffer> utf8Dictionary
Indexed<ByteBuffer> utf8Dictionary,
BitmapFactory bitmapFactory
)
{
this.column = singleValueColumn;
this.multiValueColumn = multiValueColumn;
this.utf8Dictionary = utf8Dictionary;
this.bitmapFactory = bitmapFactory;
}
@Override
@ -135,6 +139,11 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum
return utf8Dictionary.size();
}
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}
@Override
public HistoricalDimensionSelector makeDimensionSelector(
final ReadableOffset offset,

View File

@ -36,6 +36,7 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
// The number of doubles per buffer.
private final int sizePer;
private final CompressionStrategy strategy;
public BlockLayoutColumnarDoublesSupplier(
int totalSize,
@ -45,7 +46,8 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
CompressionStrategy strategy
)
{
baseDoubleBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(byteOrder, strategy));
this.strategy = strategy;
this.baseDoubleBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(byteOrder, strategy));
this.totalSize = totalSize;
this.sizePer = sizePer;
}
@ -78,7 +80,8 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
}
}
private class BlockLayoutColumnarDoubles implements ColumnarDoubles
// This needs to be a public class so that SemanticCreator is able to call it.
public class BlockLayoutColumnarDoubles implements ColumnarDoubles
{
final Indexed<ResourceHolder<ByteBuffer>> singleThreadedDoubleBuffers = baseDoubleBuffers.singleThreaded();
@ -91,6 +94,11 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
@Nullable
DoubleBuffer doubleBuffer;
public CompressionStrategy getCompressionStrategy()
{
return strategy;
}
@Override
public int size()
{

View File

@ -43,6 +43,7 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
// The number of longs per buffer.
private final int sizePer;
private final CompressionFactory.LongEncodingReader baseReader;
private final CompressionStrategy strategy;
public BlockLayoutColumnarLongsSupplier(
int totalSize,
@ -53,6 +54,7 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
CompressionStrategy strategy
)
{
this.strategy = strategy;
this.baseLongBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(order, strategy));
this.totalSize = totalSize;
this.sizePer = sizePer;
@ -124,7 +126,8 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
}
}
private class BlockLayoutColumnarLongs implements ColumnarLongs
// This needs to be a public class so that SemanticCreator is able to call it.
public class BlockLayoutColumnarLongs implements ColumnarLongs
{
final CompressionFactory.LongEncodingReader reader = baseReader.duplicate();
final Indexed<ResourceHolder<ByteBuffer>> singleThreadedLongBuffers = baseLongBuffers.singleThreaded();
@ -140,6 +143,16 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
@Nullable
LongBuffer longBuffer;
public CompressionFactory.LongEncodingStrategy getEncodingStrategy()
{
return baseReader.getStrategy();
}
public CompressionStrategy getCompressionStrategy()
{
return strategy;
}
@Override
public int size()
{

View File

@ -308,7 +308,7 @@ public class CompressedVSizeColumnarIntsSupplier implements WritableSupplier<Col
}
}
private class CompressedVSizeColumnarInts implements ColumnarInts
public class CompressedVSizeColumnarInts implements ColumnarInts
{
final Indexed<ResourceHolder<ByteBuffer>> singleThreadedBuffers = baseBuffers.singleThreaded();
@ -329,6 +329,11 @@ public class CompressedVSizeColumnarIntsSupplier implements WritableSupplier<Col
return totalSize;
}
public CompressionStrategy getCompressionStrategy()
{
return compression;
}
/**
* Returns the value at the given index into the column.
* <p/>

Some files were not shown because too many files have changed in this diff Show More