mirror of https://github.com/apache/druid.git
Merge branch 'rename-d1-dbl1' into unnest-relfieldtrimmer-unnestfieldtype
This commit is contained in:
commit
e67b609eb6
|
@ -40,7 +40,7 @@ jobs:
|
|||
- uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: 'zulu'
|
||||
java-version: '8'
|
||||
java-version: '17'
|
||||
cache: 'maven'
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
|
|
|
@ -28,7 +28,7 @@ on:
|
|||
jobs:
|
||||
build:
|
||||
if: (github.event_name == 'schedule' && github.repository == 'apache/druid')
|
||||
name: build (jdk8)
|
||||
name: build (jdk17)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout branch
|
||||
|
@ -37,7 +37,7 @@ jobs:
|
|||
- name: setup java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
java-version: '8'
|
||||
java-version: '17'
|
||||
distribution: 'zulu'
|
||||
|
||||
- name: Cache Maven m2 repository
|
||||
|
@ -60,8 +60,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
needs: build
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 11
|
||||
build_jdk: 17
|
||||
runtime_jdk: 21.0.4
|
||||
testing_groups: -Dgroups=${{ matrix.testing_group }}
|
||||
use_indexer: middleManager
|
||||
group: ${{ matrix.testing_group }}
|
||||
|
@ -74,8 +74,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
needs: build
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 11
|
||||
build_jdk: 17
|
||||
runtime_jdk: 21.0.4
|
||||
testing_groups: -Dgroups=${{ matrix.testing_group }}
|
||||
use_indexer: indexer
|
||||
group: ${{ matrix.testing_group }}
|
||||
|
@ -88,8 +88,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
needs: build
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 11
|
||||
build_jdk: 17
|
||||
runtime_jdk: 21.0.4
|
||||
testing_groups: -Dgroups=${{ matrix.testing_group }}
|
||||
use_indexer: middleManager
|
||||
override_config_path: ./environment-configs/test-groups/prepopulated-data
|
||||
|
@ -103,8 +103,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
needs: build
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 11
|
||||
build_jdk: 17
|
||||
runtime_jdk: 21.0.4
|
||||
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties
|
||||
use_indexer: ${{ matrix.indexer }}
|
||||
group: other
|
||||
|
@ -122,7 +122,7 @@ jobs:
|
|||
- name: setup java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
java-version: '8'
|
||||
java-version: '17'
|
||||
distribution: 'zulu'
|
||||
cache: maven
|
||||
|
||||
|
|
|
@ -66,8 +66,8 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
#jdk: [8, 11, 17]
|
||||
jdk: [8]
|
||||
# jdk: [11, 17]
|
||||
jdk: [17]
|
||||
it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat, Security, Query]
|
||||
#indexer: [indexer, middleManager]
|
||||
indexer: [middleManager]
|
||||
|
@ -86,8 +86,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-revised-its.yml
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 11
|
||||
build_jdk: 17
|
||||
runtime_jdk: 21.0.4
|
||||
use_indexer: middleManager
|
||||
script: ./it.sh github S3DeepStorage
|
||||
it: S3DeepStorage
|
||||
|
@ -103,8 +103,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-revised-its.yml
|
||||
if: ${{ inputs.BACKWARD_COMPATIBILITY_IT_ENABLED == 'true' && (needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true') }}
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 8
|
||||
build_jdk: 17
|
||||
runtime_jdk: 17
|
||||
use_indexer: middleManager
|
||||
script: ./it.sh github BackwardCompatibilityMain
|
||||
it: BackwardCompatibilityMain
|
||||
|
|
|
@ -51,8 +51,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 8
|
||||
build_jdk: 17
|
||||
runtime_jdk: 17
|
||||
testing_groups: -Dgroups=${{ matrix.testing_group }}
|
||||
override_config_path: ./environment-configs/test-groups/prepopulated-data
|
||||
use_indexer: middleManager
|
||||
|
@ -67,8 +67,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 8
|
||||
build_jdk: 17
|
||||
runtime_jdk: 17
|
||||
testing_groups: -Dgroups=${{ matrix.testing_group }}
|
||||
use_indexer: indexer
|
||||
group: ${{ matrix.testing_group }}
|
||||
|
@ -82,8 +82,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 8
|
||||
build_jdk: 17
|
||||
runtime_jdk: 17
|
||||
testing_groups: -Dgroups=${{ matrix.testing_group }}
|
||||
use_indexer: middleManager
|
||||
override_config_path: ./environment-configs/test-groups/prepopulated-data
|
||||
|
@ -94,7 +94,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
jdk: [8, 17, 21]
|
||||
jdk: [11, 17, 21]
|
||||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
with:
|
||||
|
@ -115,8 +115,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 8
|
||||
build_jdk: 17
|
||||
runtime_jdk: 17
|
||||
testing_groups: -Dgroups=shuffle-deep-store
|
||||
use_indexer: ${{ matrix.indexer }}
|
||||
override_config_path: ./environment-configs/test-groups/shuffle-deep-store
|
||||
|
@ -127,8 +127,8 @@ jobs:
|
|||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 8
|
||||
build_jdk: 17
|
||||
runtime_jdk: 17
|
||||
testing_groups: -Dgroups=custom-coordinator-duties
|
||||
use_indexer: middleManager
|
||||
override_config_path: ./environment-configs/test-groups/custom-coordinator-duties
|
||||
|
@ -136,7 +136,7 @@ jobs:
|
|||
|
||||
integration-k8s-leadership-tests:
|
||||
needs: changes
|
||||
name: (Compile=openjdk8, Run=openjdk8, Cluster Build On K8s) ITNestedQueryPushDownTest integration test
|
||||
name: (Compile=openjdk17, Run=openjdk17, Cluster Build On K8s) ITNestedQueryPushDownTest integration test
|
||||
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
|
@ -154,7 +154,7 @@ jobs:
|
|||
- name: setup java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
java-version: '8'
|
||||
java-version: '17'
|
||||
distribution: 'zulu'
|
||||
|
||||
# the build step produces SNAPSHOT artifacts into the local maven repository,
|
||||
|
@ -195,8 +195,8 @@ jobs:
|
|||
indexer: [middleManager, indexer]
|
||||
uses: ./.github/workflows/reusable-standard-its.yml
|
||||
with:
|
||||
build_jdk: 8
|
||||
runtime_jdk: 8
|
||||
build_jdk: 17
|
||||
runtime_jdk: 17
|
||||
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-datasource-schema,cds-task-schema-publish-disabled,cds-coordinator-metadata-query-disabled
|
||||
use_indexer: ${{ matrix.indexer }}
|
||||
group: other
|
||||
|
|
|
@ -42,7 +42,7 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
# Use JDK 21.0.4 to work around https://github.com/apache/druid/issues/17429
|
||||
java: [ '8', '11', '17', '21.0.4' ]
|
||||
java: [ '11', '17', '21.0.4' ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: checkout branch
|
||||
|
@ -64,7 +64,7 @@ jobs:
|
|||
|
||||
- name: script checks
|
||||
# who watches the watchers?
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ./check_test_suite_test.py
|
||||
|
||||
- name: (openjdk17) strict compilation
|
||||
|
@ -74,43 +74,43 @@ jobs:
|
|||
run: ${MVN} clean -DstrictCompile compile test-compile --fail-at-end ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
|
||||
|
||||
- name: maven install
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: |
|
||||
echo 'Running Maven install...' &&
|
||||
${MVN} clean install -q -ff -pl '!distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C &&
|
||||
${MVN} install -q -ff -pl 'distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
|
||||
|
||||
- name: checkstyle
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ${MVN} checkstyle:checkstyle --fail-at-end
|
||||
|
||||
- name: license checks
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ./.github/scripts/license_checks_script.sh
|
||||
|
||||
- name: analyze dependencies
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: |
|
||||
./.github/scripts/analyze_dependencies_script.sh
|
||||
|
||||
- name: animal sniffer checks
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ${MVN} animal-sniffer:check --fail-at-end
|
||||
|
||||
- name: enforcer checks
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ${MVN} enforcer:enforce --fail-at-end
|
||||
|
||||
- name: forbidden api checks
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ${MVN} forbiddenapis:check forbiddenapis:testCheck --fail-at-end
|
||||
|
||||
- name: pmd checks
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ${MVN} pmd:check --fail-at-end # TODO: consider adding pmd:cpd-check
|
||||
|
||||
- name: spotbugs checks
|
||||
if: ${{ matrix.java == '8' }}
|
||||
if: ${{ matrix.java == '17' }}
|
||||
run: ${MVN} spotbugs:check --fail-at-end -pl '!benchmarks'
|
||||
|
||||
openrewrite:
|
||||
|
@ -122,7 +122,7 @@ jobs:
|
|||
- uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: 'zulu'
|
||||
java-version: '8'
|
||||
java-version: '17'
|
||||
cache: 'maven'
|
||||
|
||||
- name: maven install
|
||||
|
|
|
@ -80,7 +80,7 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
# Use JDK 21.0.4 to work around https://github.com/apache/druid/issues/17429
|
||||
jdk: [ '8', '11', '17', '21.0.4' ]
|
||||
jdk: [ '11', '17', '21.0.4' ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout branch
|
||||
|
@ -162,7 +162,7 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
# Use JDK 21.0.4 to work around https://github.com/apache/druid/issues/17429
|
||||
jdk: [ '11', '17', '21.0.4' ]
|
||||
jdk: [ '11', '21.0.4' ]
|
||||
name: "unit tests (jdk${{ matrix.jdk }}, sql-compat=true)"
|
||||
uses: ./.github/workflows/unit-tests.yml
|
||||
needs: unit-tests
|
||||
|
@ -176,11 +176,11 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
sql_compatibility: [ false, true ]
|
||||
name: "unit tests (jdk8, sql-compat=${{ matrix.sql_compatibility }})"
|
||||
name: "unit tests (jdk17, sql-compat=${{ matrix.sql_compatibility }})"
|
||||
uses: ./.github/workflows/unit-tests.yml
|
||||
needs: build
|
||||
with:
|
||||
jdk: 8
|
||||
jdk: 17
|
||||
sql_compatibility: ${{ matrix.sql_compatibility }}
|
||||
|
||||
standard-its:
|
||||
|
|
|
@ -84,7 +84,7 @@
|
|||
<resource url="http://maven.apache.org/ASSEMBLY/2.0.0" location="$PROJECT_DIR$/.idea/xml-schemas/assembly-2.0.0.xsd" />
|
||||
<resource url="http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" location="$PROJECT_DIR$/.idea/xml-schemas/svg11.dtd" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="false" project-jdk-name="17" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/classes" />
|
||||
</component>
|
||||
</project>
|
||||
|
|
|
@ -106,7 +106,7 @@ Find articles written by community members and a calendar of upcoming events on
|
|||
|
||||
### Building from source
|
||||
|
||||
Please note that JDK 8 or JDK 11 is required to build Druid.
|
||||
Please note that JDK 11 or JDK 17 is required to build Druid.
|
||||
|
||||
See the latest [build guide](https://druid.apache.org/docs/latest/development/build.html) for instructions on building Apache Druid from source.
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-multibindings</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
|
@ -239,7 +240,6 @@
|
|||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<jmh.version>1.21</jmh.version>
|
||||
<javac.target>1.8</javac.target>
|
||||
<uberjar.name>benchmarks</uberjar.name>
|
||||
</properties>
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.druid.query.dimension.DimensionSpec;
|
|||
import org.apache.druid.query.groupby.GroupByQuery;
|
||||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.segment.TestHelper;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
|
@ -115,7 +116,8 @@ public class GroupByDeserializationBenchmark
|
|||
}
|
||||
},
|
||||
null,
|
||||
null
|
||||
null,
|
||||
new GroupByStatsProvider()
|
||||
);
|
||||
|
||||
decoratedMapper = groupByQueryQueryToolChest.decorateObjectMapper(undecoratedMapper, sqlQuery);
|
||||
|
|
|
@ -55,6 +55,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig;
|
|||
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
|
||||
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
|
||||
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.GroupingEngine;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||
|
@ -373,6 +374,7 @@ public class GroupByTypeInterfaceBenchmark
|
|||
};
|
||||
|
||||
final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
|
||||
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
|
||||
final GroupByResourcesReservationPool groupByResourcesReservationPool =
|
||||
new GroupByResourcesReservationPool(mergePool, config);
|
||||
final GroupingEngine groupingEngine = new GroupingEngine(
|
||||
|
@ -381,7 +383,8 @@ public class GroupByTypeInterfaceBenchmark
|
|||
groupByResourcesReservationPool,
|
||||
TestHelper.makeJsonMapper(),
|
||||
new ObjectMapper(new SmileFactory()),
|
||||
QueryBenchmarkUtil.NOOP_QUERYWATCHER
|
||||
QueryBenchmarkUtil.NOOP_QUERYWATCHER,
|
||||
groupByStatsProvider
|
||||
);
|
||||
|
||||
factory = new GroupByQueryRunnerFactory(
|
||||
|
|
|
@ -79,6 +79,7 @@ import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
|
|||
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
|
||||
import org.apache.druid.query.groupby.GroupByQueryRunnerTest;
|
||||
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.GroupingEngine;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.query.planning.DataSourceAnalysis;
|
||||
|
@ -343,6 +344,7 @@ public class CachingClusteredClientBenchmark
|
|||
bufferSupplier,
|
||||
processingConfig.getNumMergeBuffers()
|
||||
);
|
||||
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
|
||||
final GroupByResourcesReservationPool groupByResourcesReservationPool =
|
||||
new GroupByResourcesReservationPool(mergeBufferPool, config);
|
||||
final GroupingEngine groupingEngine = new GroupingEngine(
|
||||
|
@ -351,7 +353,8 @@ public class CachingClusteredClientBenchmark
|
|||
groupByResourcesReservationPool,
|
||||
mapper,
|
||||
mapper,
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER,
|
||||
groupByStatsProvider
|
||||
);
|
||||
final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool);
|
||||
return new GroupByQueryRunnerFactory(groupingEngine, toolChest, bufferPool);
|
||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig;
|
|||
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
|
||||
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
|
||||
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.GroupingEngine;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||
|
@ -490,6 +491,7 @@ public class GroupByBenchmark
|
|||
};
|
||||
|
||||
final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
|
||||
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
|
||||
final GroupByResourcesReservationPool groupByResourcesReservationPool =
|
||||
new GroupByResourcesReservationPool(mergePool, config);
|
||||
final GroupingEngine groupingEngine = new GroupingEngine(
|
||||
|
@ -498,7 +500,8 @@ public class GroupByBenchmark
|
|||
groupByResourcesReservationPool,
|
||||
TestHelper.makeJsonMapper(),
|
||||
new ObjectMapper(new SmileFactory()),
|
||||
QueryBenchmarkUtil.NOOP_QUERYWATCHER
|
||||
QueryBenchmarkUtil.NOOP_QUERYWATCHER,
|
||||
groupByStatsProvider
|
||||
);
|
||||
|
||||
factory = new GroupByQueryRunnerFactory(
|
||||
|
|
|
@ -22,7 +22,7 @@ import sys
|
|||
# this script does some primitive examination of git diff to determine if a test suite needs to be run or not
|
||||
|
||||
# these jobs should always be run, no matter what
|
||||
always_run_jobs = ['license checks', '(openjdk8) packaging check', '(openjdk11) packaging check']
|
||||
always_run_jobs = ['license checks', '(openjdk17) packaging check']
|
||||
|
||||
# ignore changes to these files completely since they don't impact CI, if the changes are only to these files then all
|
||||
# of CI can be skipped. however, jobs which are always run will still be run even if only these files are changed
|
||||
|
|
|
@ -29,5 +29,4 @@ This ruleset defines the PMD rules for the Apache Druid project.
|
|||
|
||||
<rule ref="category/java/codestyle.xml/UnnecessaryImport" />
|
||||
<rule ref="category/java/codestyle.xml/TooManyStaticImports" />
|
||||
<rule ref="category/java/codestyle.xml/UnnecessaryFullyQualifiedName"/>
|
||||
</ruleset>
|
||||
|
|
|
@ -137,4 +137,15 @@
|
|||
<Bug pattern="SWL_SLEEP_WITH_LOCK_HELD"/>
|
||||
<Bug pattern="UL_UNRELEASED_LOCK_EXCEPTION_PATH"/>
|
||||
<Bug pattern="URF_UNREAD_FIELD"/>
|
||||
<!-- The following patterns have been excluded as part of upgrading to Java 17 as there were 100s of occurrences.
|
||||
We should revisit these later. -->
|
||||
<Bug pattern="CT_CONSTRUCTOR_THROW"/>
|
||||
<Bug pattern="SING_SINGLETON_HAS_NONPRIVATE_CONSTRUCTOR"/>
|
||||
<Bug pattern="DCN_NULLPOINTER_EXCEPTION"/>
|
||||
<Bug pattern="SING_SINGLETON_INDIRECTLY_IMPLEMENTS_CLONEABLE"/>
|
||||
<Bug pattern="MS_EXPOSE_REP"/>
|
||||
<Bug pattern="PA_PUBLIC_PRIMITIVE_ATTRIBUTE"/>
|
||||
<Bug pattern="EI_EXPOSE_STATIC_REP2"/>
|
||||
<Bug pattern="SS_SHOULD_BE_STATIC"/>
|
||||
<Bug pattern="SING_SINGLETON_IMPLEMENTS_SERIALIZABLE"/>
|
||||
</FindBugsFilter>
|
||||
|
|
|
@ -23,7 +23,7 @@ ARG JDK_VERSION=17
|
|||
# This is because it's not able to build the distribution on arm64 due to dependency problem of web-console. See: https://github.com/apache/druid/issues/13012
|
||||
# Since only java jars are shipped in the final image, it's OK to build the distribution on x64.
|
||||
# Once the web-console dependency problem is resolved, we can remove the --platform directive.
|
||||
FROM --platform=linux/amd64 maven:3.8.6-jdk-11-slim as builder
|
||||
FROM --platform=linux/amd64 maven:3.8.4-openjdk-17-slim as builder
|
||||
|
||||
# Rebuild from source in this stage
|
||||
# This can be unset if the tarball was already built outside of Docker
|
||||
|
|
|
@ -195,9 +195,9 @@ and `druid.tlsPort` properties on each service. Please see `Configuration` secti
|
|||
|
||||
Druid uses Jetty as an embedded web server. To learn more about TLS/SSL, certificates, and related concepts in Jetty, including explanations of the configuration settings below, see "Configuring SSL/TLS KeyStores" in the [Jetty Operations Guide](https://www.eclipse.org/jetty/documentation.php).
|
||||
|
||||
For information about TLS/SSL support in Java in general, see the [Java Secure Socket Extension (JSSE) Reference Guide](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html).
|
||||
For information about TLS/SSL support in Java in general, see the [Java Secure Socket Extension (JSSE) Reference Guide](https://docs.oracle.com/en/java/javase/11/security/java-secure-socket-extension-jsse-reference-guide.html).
|
||||
The [Java Cryptography Architecture
|
||||
Standard Algorithm Name Documentation for JDK 8](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all possible
|
||||
Standard Algorithm Name Documentation for JDK 11](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all possible
|
||||
values for the following properties, among others provided by the Java implementation.
|
||||
|
||||
|Property|Description|Default|Required|
|
||||
|
@ -230,7 +230,7 @@ These properties apply to the SSLContext that will be provided to the internal H
|
|||
|`druid.client.https.trustStoreAlgorithm`|Algorithm to be used by TrustManager to validate certificate chains|`javax.net.ssl.TrustManagerFactory.getDefaultAlgorithm()`|no|
|
||||
|`druid.client.https.trustStorePassword`|The [Password Provider](../operations/password-provider.md) or String password for the Trust Store.|none|yes|
|
||||
|
||||
This [document](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all the possible
|
||||
This [document](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all the possible
|
||||
values for the above mentioned configs among others provided by Java implementation.
|
||||
|
||||
### Authentication and authorization
|
||||
|
@ -408,6 +408,7 @@ Metric monitoring is an essential part of Druid operations. The following monito
|
|||
|`org.apache.druid.server.metrics.TaskSlotCountStatsMonitor`|Reports metrics about task slot usage per emission period.|
|
||||
|`org.apache.druid.server.metrics.WorkerTaskCountStatsMonitor`|Reports how many ingestion tasks are currently running/pending/waiting, the number of successful/failed tasks, and metrics about task slot usage for the reporting worker, per emission period. Only supported by Middle Manager node types.|
|
||||
|`org.apache.druid.server.metrics.ServiceStatusMonitor`|Reports a heartbeat for the service.|
|
||||
|`org.apache.druid.server.metrics.GroupByStatsMonitor`|Report metrics for groupBy queries like disk and merge buffer utilization. |
|
||||
|
||||
For example, you might configure monitors on all services for system and JVM information within `common.runtime.properties` as follows:
|
||||
|
||||
|
|
|
@ -23,9 +23,9 @@ title: "Simple SSLContext Provider Module"
|
|||
-->
|
||||
|
||||
|
||||
This Apache Druid module contains a simple implementation of [SSLContext](http://docs.oracle.com/javase/8/docs/api/javax/net/ssl/SSLContext.html)
|
||||
This Apache Druid module contains a simple implementation of [SSLContext](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/javax/net/ssl/SSLContext.html)
|
||||
that will be injected to be used with HttpClient that Druid processes use internally to communicate with each other. To learn more about
|
||||
Java's SSL support, please refer to [this](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html) guide.
|
||||
Java's SSL support, please refer to [this](https://docs.oracle.com/en/java/javase/11/security/java-secure-socket-extension-jsse-reference-guide.html) guide.
|
||||
|
||||
|
||||
|Property|Description|Default|Required|
|
||||
|
@ -48,5 +48,5 @@ The following table contains optional parameters for supporting client certifica
|
|||
|`druid.client.https.keyManagerPassword`|The [Password Provider](../../operations/password-provider.md) or String password for the Key Manager.|none|no|
|
||||
|`druid.client.https.validateHostnames`|Validate the hostname of the server. This should not be disabled unless you are using [custom TLS certificate checks](../../operations/tls-support.md) and know that standard hostname validation is not needed.|true|no|
|
||||
|
||||
This [document](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all the possible
|
||||
This [document](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all the possible
|
||||
values for the above mentioned configs among others provided by Java implementation.
|
||||
|
|
|
@ -184,7 +184,7 @@ Sample specs:
|
|||
|uris|JSON array of URIs where S3 objects to be ingested are located.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|prefixes|JSON array of URI prefixes for the locations of S3 objects to be ingested. Empty objects starting with one of the given prefixes will be skipped.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|objects|JSON array of S3 Objects to be ingested.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|
||||
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
|
||||
|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (S3 URI starting with `s3://`), `__file_bucket` (S3 bucket), and `__file_path` (S3 object key).|None|no|
|
||||
| endpointConfig |Config for overriding the default S3 endpoint and signing region. This would allow ingesting data from a different S3 store. Please see [s3 config](../development/extensions-core/s3.md#connecting-to-s3-configuration) for more information.|None|No (defaults will be used if not given)
|
||||
| clientConfig |S3 client properties for the overridden s3 endpoint. This is used in conjunction with `endPointConfig`. Please see [s3 config](../development/extensions-core/s3.md#connecting-to-s3-configuration) for more information.|None|No (defaults will be used if not given)
|
||||
|
@ -289,7 +289,7 @@ Sample specs:
|
|||
|uris|JSON array of URIs where Google Cloud Storage objects to be ingested are located.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|prefixes|JSON array of URI prefixes for the locations of Google Cloud Storage objects to be ingested. Empty objects starting with one of the given prefixes will be skipped.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|objects|JSON array of Google Cloud Storage objects to be ingested.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|
||||
|objectGlob|A glob for the object part of the S3 URI. In the URI `s3://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `s3://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
|
||||
|
||||
Note that the Google Cloud Storage input source will skip all empty objects only when `prefixes` is specified.
|
||||
|
||||
|
@ -377,7 +377,7 @@ Sample specs:
|
|||
|uris|JSON array of URIs where the Azure objects to be ingested are located. Use this format: `azureStorage://STORAGE_ACCOUNT/CONTAINER/PATH_TO_FILE`|None|One of the following must be set:`uris`, `prefixes`, or `objects`.|
|
||||
|prefixes|JSON array of URI prefixes for the locations of Azure objects to ingest. Use this format`azureStorage://STORAGE_ACCOUNT/CONTAINER/PREFIX`. Empty objects starting with any of the given prefixes are skipped.|None|One of the following must be set:`uris`, `prefixes`, or `objects`.|
|
||||
|objects|JSON array of Azure objects to ingest.|None|One of the following must be set:`uris`, `prefixes`, or `objects`.|
|
||||
|objectGlob|A glob for the object part of the Azure URI. In the URI `azureStorage://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azureStorage://foo/bar/file.json` because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|
||||
|objectGlob|A glob for the object part of the Azure URI. In the URI `azureStorage://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azureStorage://foo/bar/file.json` because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
|
||||
|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (Azure blob URI starting with `azureStorage://`), `__file_bucket` (Azure bucket), and `__file_path` (Azure object path).|None|no|
|
||||
|properties|Properties object for overriding the default Azure configuration. See below for more information.|None|No (defaults will be used if not given)|
|
||||
|
||||
|
@ -471,7 +471,7 @@ Sample specs:
|
|||
|uris|JSON array of URIs where the Azure objects to be ingested are located, in the form `azure://<container>/<path-to-file>`|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|prefixes|JSON array of URI prefixes for the locations of Azure objects to ingest, in the form `azure://<container>/<prefix>`. Empty objects starting with one of the given prefixes are skipped.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|objects|JSON array of Azure objects to ingest.|None|`uris` or `prefixes` or `objects` must be set|
|
||||
|objectGlob|A glob for the object part of the Azure URI. In the URI `azure://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azure://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no|
|
||||
|objectGlob|A glob for the object part of the Azure URI. In the URI `azure://foo/bar/file.json`, the glob is applied to `bar/file.json`.<br /><br />The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azure://foo/bar/file.json`, because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.<br /><br />For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)).|None|no|
|
||||
|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (Azure blob URI starting with `azure://`), `__file_bucket` (Azure bucket), and `__file_path` (Azure object path).|None|no|
|
||||
|
||||
Note that the Azure input source skips all empty objects only when `prefixes` is specified.
|
||||
|
|
|
@ -143,41 +143,54 @@ Similarly, you can use `metrics-.*` as the value for `topicPattern` if you want
|
|||
|
||||
#### Consumer properties
|
||||
|
||||
Consumer properties control how a supervisor reads and processes event messages from a Kafka stream. For more information about consumers, refer to the [Apache Kafka documentation](https://kafka.apache.org/documentation/#consumerconfigs).
|
||||
Consumer properties control how a supervisor reads and processes event messages from a Kafka stream. For more information about consumer configuration and advanced use cases, refer to the [Kafka documentation](https://kafka.apache.org/documentation/#consumerconfigs).
|
||||
|
||||
The `consumerProperties` object must contain a `bootstrap.servers` property with a list of Kafka brokers in the form: `<BROKER_1>:<PORT_1>,<BROKER_2>:<PORT_2>,...`.
|
||||
By default, `isolation.level` is set to `read_committed`.
|
||||
You must include `bootstrap.servers` in consumer properties with a list of Kafka brokers in the format `<BROKER_1>:<PORT_1>,<BROKER_2>:<PORT_2>,...`.
|
||||
In some cases, you may need to retrieve consumer properties at runtime. For example, when `bootstrap.servers` is unknown or not static.
|
||||
|
||||
If you use older versions of Kafka servers without transactions support or don't want Druid to consume only committed transactions, set `isolation.level` to `read_uncommitted`. If you need Druid to consume older versions of Kafka, make sure offsets are sequential, since there is no offset gap check in Druid.
|
||||
The `isolation.level` property in `consumerProperties` determines how Druid reads messages written transactionally.
|
||||
If you use older versions of Kafka servers without transaction support or you don't want Druid to consume only committed transactions, set `isolation.level` to `read_uncommitted`.
|
||||
With `read_uncommitted`, which is the default setting, Druid reads all messages, including aborted transactional messages.
|
||||
Make sure offsets are sequential, since there is no offset gap check in Druid.
|
||||
For Druid to consume only committed transactional messages, set `isolation.level` to `read_committed`.
|
||||
|
||||
If your Kafka cluster enables consumer-group based ACLs, you can set `group.id` in `consumerProperties` to override the default auto generated group ID.
|
||||
If your Kafka cluster enables consumer group ACLs, you can set `group.id` in `consumerProperties` to override the default auto generated group ID.
|
||||
|
||||
In some cases, you may need to fetch consumer properties at runtime. For example, when `bootstrap.servers` is not known upfront or is not static. To enable SSL connections, you must provide passwords for `keystore`, `truststore`, and `key` secretly. You can provide configurations at runtime with a dynamic config provider implementation like the environment variable config provider that comes with Druid. For more information, see [Dynamic config provider](../operations/dynamic-config-provider.md).
|
||||
To enable SSL connections, you must provide passwords for `keystore`, `truststore`, and `key` confidentially. You can specify these settings in the `jaas.conf` login configuration file or in `consumerProperties` with `sasl.jaas.config`.
|
||||
To protect sensitive information, use the [environment variable dynamic config provider](../operations/dynamic-config-provider.md#environment-variable-dynamic-config-provider) to store credentials in system environment variables instead of plain text.
|
||||
Although you can also use the [password provider](../operations/password-provider.md) interface to specify SSL configuration for Kafka ingestion, consider using the dynamic config provider as this feature is deprecated.
|
||||
|
||||
For example, if you are using SASL and SSL with Kafka, set the following environment variables for the Druid user on the machines running the Overlord and the Peon services:
|
||||
For example, when using SASL and SSL with Kafka, set the following environment variables for the Druid user on machines running the Overlord and Peon services. Replace the values to match your environment configurations.
|
||||
|
||||
```
|
||||
export KAFKA_JAAS_CONFIG="org.apache.kafka.common.security.plain.PlainLoginModule required username='admin_user' password='admin_password';"
|
||||
export KAFKA_JAAS_CONFIG="org.apache.kafka.common.security.plain.PlainLoginModule required username='accesskey' password='secret key';"
|
||||
export SSL_KEY_PASSWORD=mysecretkeypassword
|
||||
export SSL_KEYSTORE_PASSWORD=mysecretkeystorepassword
|
||||
export SSL_TRUSTSTORE_PASSWORD=mysecrettruststorepassword
|
||||
```
|
||||
|
||||
When you define the consumer properties in the supervisor spec, use the dynamic config provider to refer to the environment variables:
|
||||
|
||||
```json
|
||||
"druid.dynamic.config.provider": {
|
||||
"type": "environment",
|
||||
"variables": {
|
||||
"sasl.jaas.config": "KAFKA_JAAS_CONFIG",
|
||||
"ssl.key.password": "SSL_KEY_PASSWORD",
|
||||
"ssl.keystore.password": "SSL_KEYSTORE_PASSWORD",
|
||||
"ssl.truststore.password": "SSL_TRUSTSTORE_PASSWORD"
|
||||
"consumerProperties": {
|
||||
"bootstrap.servers": "localhost:9092",
|
||||
"security.protocol": "SASL_SSL",
|
||||
"sasl.mechanism": "PLAIN",
|
||||
"ssl.keystore.location": "/opt/kafka/config/kafka01.keystore.jks",
|
||||
"ssl.truststore.location": "/opt/kafka/config/kafka.truststore.jks",
|
||||
"druid.dynamic.config.provider": {
|
||||
"type": "environment",
|
||||
"variables": {
|
||||
"sasl.jaas.config": "KAFKA_JAAS_CONFIG",
|
||||
"ssl.key.password": "SSL_KEY_PASSWORD",
|
||||
"ssl.keystore.password": "SSL_KEYSTORE_PASSWORD",
|
||||
"ssl.truststore.password": "SSL_TRUSTSTORE_PASSWORD"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Verify that you've changed the values for all configurations to match your own environment. In the Druid data loader interface, you can use the environment variable config provider syntax in the **Consumer properties** field on the **Connect tab**. When connecting to Kafka, Druid replaces the environment variables with their corresponding values.
|
||||
|
||||
You can provide SSL connections with [Password provider](../operations/password-provider.md) interface to define the `keystore`, `truststore`, and `key`, but this feature is deprecated.
|
||||
When connecting to Kafka, Druid replaces the environment variables with their corresponding values.
|
||||
|
||||
#### Idle configuration
|
||||
|
||||
|
|
|
@ -27,7 +27,11 @@ a Java runtime for Druid.
|
|||
|
||||
## Selecting a Java runtime
|
||||
|
||||
Druid fully supports Java 8u92+, Java 11, and Java 17. The project team recommends Java 17.
|
||||
Druid fully supports Java 11 and Java 17. The project team recommends Java 17.
|
||||
|
||||
:::info
|
||||
Note: Starting with Apache Druid 32.0.0, support for Java 8 has been removed.
|
||||
:::
|
||||
|
||||
The project team recommends using an OpenJDK-based Java distribution. There are many free and actively-supported
|
||||
distributions available, including
|
||||
|
|
|
@ -62,7 +62,6 @@ Most metric values reset each emission period, as specified in `druid.monitoring
|
|||
|`query/failed/count`|Number of failed queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|
||||
|`query/interrupted/count`|Number of queries interrupted due to cancellation.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|
||||
|`query/timeout/count`|Number of timed out queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|
||||
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `QueryCountStatsMonitor` module is included.| |
|
||||
|`query/segments/count`|This metric is not enabled by default. See the `QueryMetrics` Interface for reference regarding enabling this metric. Number of segments that will be touched by the query. In the broker, it makes a plan to distribute the query to realtime tasks and historicals based on a snapshot of segment distribution state. If there are some segments moved after this snapshot is created, certain historicals and realtime tasks can report those segments as missing to the broker. The broker will resend the query to the new servers that serve those segments after move. In this case, those segments can be counted more than once in this metric.||Varies|
|
||||
|`query/priority`|Assigned lane and priority, only if Laning strategy is enabled. Refer to [Laning strategies](../configuration/index.md#laning-strategies)|`lane`, `dataSource`, `type`|0|
|
||||
|`sqlQuery/time`|Milliseconds taken to complete a SQL query.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`, `engine`|< 1s|
|
||||
|
@ -103,7 +102,6 @@ Most metric values reset each emission period, as specified in `druid.monitoring
|
|||
|`query/failed/count`|Number of failed queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|`query/interrupted/count`|Number of queries interrupted due to cancellation.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|`query/timeout/count`|Number of timed out queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|
||||
### Real-time
|
||||
|
||||
|
@ -119,7 +117,18 @@ Most metric values reset each emission period, as specified in `druid.monitoring
|
|||
|`query/failed/count`|Number of failed queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|`query/interrupted/count`|Number of queries interrupted due to cancellation.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|`query/timeout/count`|Number of timed out queries.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `QueryCountStatsMonitor` module is included.||
|
||||
|
||||
### GroupBy query metrics
|
||||
|
||||
These metrics are reported from broker, historical and real-time nodes
|
||||
|
||||
|`mergeBuffer/pendingRequests`|Number of requests waiting to acquire a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Should be ideally 0, though a higher number isn't representative of a problem.|
|
||||
|`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.|
|
||||
|`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.|
|
||||
|`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
|
||||
|`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
|
||||
|`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
|
||||
|`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies|
|
||||
|
||||
### Jetty
|
||||
|
||||
|
|
|
@ -37,10 +37,10 @@ Apache Druid uses Jetty as its embedded web server.
|
|||
|
||||
To get familiar with TLS/SSL, along with related concepts like keys and certificates,
|
||||
read [Configuring Secure Protocols](https://www.eclipse.org/jetty/documentation/jetty-12/operations-guide/index.html#og-protocols-ssl) in the Jetty documentation.
|
||||
To get more in-depth knowledge of TLS/SSL support in Java in general, refer to the [Java Secure Socket Extension (JSSE) Reference Guide](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html).
|
||||
To get more in-depth knowledge of TLS/SSL support in Java in general, refer to the [Java Secure Socket Extension (JSSE) Reference Guide](https://docs.oracle.com/en/java/javase/11/security/java-secure-socket-extension-jsse-reference-guide.html).
|
||||
The [Class SslContextFactory](https://www.eclipse.org/jetty/javadoc/jetty-11/org/eclipse/jetty/util/ssl/SslContextFactory.html)
|
||||
reference doc can help in understanding TLS/SSL configurations listed below. Finally, [Java Cryptography Architecture
|
||||
Standard Algorithm Name Documentation for JDK 8](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all possible
|
||||
Standard Algorithm Name Documentation for JDK 11](https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html) lists all possible
|
||||
values for the configs below, among others provided by Java implementation.
|
||||
|
||||
|Property|Description|Default|Required|
|
||||
|
@ -79,7 +79,7 @@ The following table contains non-mandatory advanced configuration options, use c
|
|||
## Internal communication over TLS
|
||||
|
||||
Whenever possible Druid processes will use HTTPS to talk to each other. To enable this communication Druid's HttpClient needs to
|
||||
be configured with a proper [SSLContext](http://docs.oracle.com/javase/8/docs/api/javax/net/ssl/SSLContext.html) that is able
|
||||
be configured with a proper [SSLContext](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/javax/net/ssl/SSLContext.html) that is able
|
||||
to validate the Server Certificates, otherwise communication will fail.
|
||||
|
||||
Since, there are various ways to configure SSLContext, by default, Druid looks for an instance of SSLContext Guice binding
|
||||
|
|
|
@ -255,7 +255,7 @@ For a regular dimension, it assumes the string is formatted in
|
|||
[ISO-8601 date and time format](https://en.wikipedia.org/wiki/ISO_8601).
|
||||
|
||||
* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or null to use the default ISO8601 format.
|
||||
* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](http://www.oracle.com/technetwork/java/javase/java8locales-2095355.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc.
|
||||
* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc.
|
||||
* `timeZone` : time zone to use in [IANA tz database format](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones), e.g. `Europe/Berlin` (this can possibly be different than the aggregation time-zone)
|
||||
* `granularity` : [granularity](granularities.md) to apply before formatting, or omit to not apply any granularity.
|
||||
* `asMillis` : boolean value, set to true to treat input strings as millis rather than ISO8601 strings. Additionally, if `format` is null or not specified, output will be in millis rather than ISO8601.
|
||||
|
|
|
@ -439,7 +439,7 @@ The regular expression filter is similar to the selector filter, but using regul
|
|||
| -------- | ----------- | -------- |
|
||||
| `type` | Must be "regex".| Yes |
|
||||
| `dimension` | Input column or virtual column name to filter on. | Yes |
|
||||
| `pattern` | String pattern to match - any standard [Java regular expression](http://docs.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html). | Yes |
|
||||
| `pattern` | String pattern to match - any standard [Java regular expression](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html). | Yes |
|
||||
| `extractionFn` | [Extraction function](./dimensionspecs.md#extraction-functions) to apply to `dimension` prior to value matching. See [filtering with extraction functions](#filtering-with-extraction-functions) for details. | No |
|
||||
|
||||
Note that it is often more optimal to use a like filter instead of a regex for simple matching of prefixes.
|
||||
|
|
|
@ -81,7 +81,7 @@ The following built-in functions are available.
|
|||
|name|description|
|
||||
|----|-----------|
|
||||
|concat|concat(expr, expr...) concatenate a list of strings|
|
||||
|format|format(pattern[, args...]) returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#format-java.lang.String-java.lang.Object...-).|
|
||||
|format|format(pattern[, args...]) returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object...)).|
|
||||
|like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`|
|
||||
|lookup|lookup(expr, lookup-name[,replaceMissingValueWith]) looks up expr in a registered,`replaceMissingValueWith` is an optional constant string [query-time lookup](../querying/lookups.md)|
|
||||
|parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.|
|
||||
|
|
|
@ -115,7 +115,7 @@ String functions accept strings and return a type appropriate to the function.
|
|||
|`REPLACE(expr, substring, replacement)`|Replaces instances of `substring` in `expr` with `replacement` and returns the result.|
|
||||
|`REPEAT(expr, N)`|Repeats `expr` `N` times.|
|
||||
|`REVERSE(expr)`|Reverses `expr`.|
|
||||
|`STRING_FORMAT(pattern[, args...])`|Returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#format-java.lang.String-java.lang.Object...-).|
|
||||
|`STRING_FORMAT(pattern[, args...])`|Returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object...)).|
|
||||
|`STRPOS(expr, substring)`|Returns the index of `substring` within `expr`, with indexes starting from 1. If `substring` is not found, returns 0.|
|
||||
|`SUBSTRING(expr, index[, length])`|Returns a substring of `expr` starting at a given one-based index. If `length` is omitted, extracts characters to the end of the string, otherwise returns a substring of `length` UTF-16 characters.|
|
||||
|`SUBSTR(expr, index[, length])`|Alias for `SUBSTRING`.|
|
||||
|
|
|
@ -32,7 +32,7 @@ sidebar_label: "TopN"
|
|||
|
||||
Apache Druid TopN queries return a sorted set of results for the values in a given dimension according to some criteria. Conceptually, they can be thought of as an approximate [GroupByQuery](../querying/groupbyquery.md) over a single dimension with an [Ordering](../querying/limitspec.md) spec. TopNs are much faster and resource efficient than GroupBys for this use case. These types of queries take a topN query object and return an array of JSON objects where each object represents a value asked for by the topN query.
|
||||
|
||||
TopNs are approximate in that each data process will rank their top K results and only return those top K results to the Broker. K, by default in Druid, is `max(1000, threshold)`. In practice, this means that if you ask for the top 1000 items ordered, the correctness of the first ~900 items will be 100%, and the ordering of the results after that is not guaranteed. TopNs can be made more accurate by increasing the threshold.
|
||||
TopNs are approximate in that each data process will rank their top K results and only return those top K results to the Broker. K, by default in Druid, is `max(1000, threshold)`.
|
||||
|
||||
A topN query object looks like:
|
||||
|
||||
|
|
|
@ -133,7 +133,7 @@ The [basic cluster tuning guide](../operations/basic-cluster-tuning.md) has info
|
|||
|
||||
We recommend running your favorite Linux distribution. You will also need
|
||||
|
||||
* [Java 8u92+, 11, or 17](../operations/java.md)
|
||||
* [Java 11 or 17](../operations/java.md)
|
||||
* Python 2 or Python 3
|
||||
|
||||
:::info
|
||||
|
@ -141,8 +141,8 @@ We recommend running your favorite Linux distribution. You will also need
|
|||
`DRUID_JAVA_HOME` or `JAVA_HOME`. For more details run the `bin/verify-java` script.
|
||||
:::
|
||||
|
||||
For information about installing Java, see the documentation for your OS package manager. If your Ubuntu-based OS does not have a recent enough version of Java, WebUpd8 offers [packages for those
|
||||
OSes](http://www.webupd8.org/2012/09/install-oracle-java-8-in-ubuntu-via-ppa.html).
|
||||
For information about installing Java, see the documentation for your OS package manager. If your Ubuntu-based OS does not have a recent enough version of Java, Linux Uprising offers [packages for those
|
||||
OSes](https://launchpad.net/~linuxuprising/+archive/ubuntu/java).
|
||||
|
||||
## Download the distribution
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ You can follow these steps on a relatively modest machine, such as a workstation
|
|||
The software requirements for the installation machine are:
|
||||
|
||||
* Linux, Mac OS X, or other Unix-like OS. (Windows is not supported)
|
||||
* [Java 8u92+, 11, or 17](../operations/java.md)
|
||||
* [Java 11 or 17](../operations/java.md)
|
||||
* Python 3 (preferred) or Python 2
|
||||
* Perl 5
|
||||
|
||||
|
|
|
@ -28,14 +28,14 @@ sub fail_check {
|
|||
: "No Java runtime was detected on your system.";
|
||||
|
||||
print STDERR <<"EOT";
|
||||
Druid requires Java 8, 11, or 17. $current_version_text
|
||||
Druid requires Java 11 or 17. $current_version_text
|
||||
|
||||
If you believe this check is in error, or you want to proceed with a potentially
|
||||
unsupported Java runtime, you can skip this check using an environment variable:
|
||||
|
||||
export DRUID_SKIP_JAVA_CHECK=1
|
||||
|
||||
Otherwise, install Java 8, 11, or 17 in one of the following locations.
|
||||
Otherwise, install Java 11 or 17 in one of the following locations.
|
||||
|
||||
* DRUID_JAVA_HOME
|
||||
* JAVA_HOME
|
||||
|
@ -68,6 +68,6 @@ if ($?) {
|
|||
}
|
||||
|
||||
# If we know it won't work, die. Otherwise hope for the best.
|
||||
if ($java_version =~ /version \"((\d+)\.(\d+).*?)\"/ && !($2 == 1 && $3 == 8) && $2 != 11 && $2 != 17 ) {
|
||||
if ($java_version =~ /version \"((\d+)\.(\d+).*?)\"/ && $2 != 11 && $2 != 17) {
|
||||
fail_check($1);
|
||||
}
|
||||
|
|
|
@ -52,11 +52,11 @@ RUN rpm --import http://repos.azulsystems.com/RPM-GPG-KEY-azulsystems && \
|
|||
rpm -ivh zulu-repo-${ZULU_REPO_VER}.noarch.rpm && \
|
||||
yum -q -y update && \
|
||||
yum -q -y upgrade && \
|
||||
yum -q -y install zulu8-jdk && \
|
||||
yum -q -y install zulu17-jdk && \
|
||||
yum clean all && \
|
||||
rm -rf /var/cache/yum zulu-repo_${ZULU_REPO_VER}.noarch.rpm
|
||||
|
||||
ENV JAVA_HOME=/usr/lib/jvm/zulu8
|
||||
ENV JAVA_HOME=/usr/lib/jvm/zulu17
|
||||
ENV PATH $PATH:$JAVA_HOME/bin
|
||||
|
||||
# hadoop
|
||||
|
@ -73,7 +73,7 @@ ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
|
|||
ENV YARN_CONF_DIR $HADOOP_HOME/etc/hadoop
|
||||
|
||||
# in hadoop 3 the example file is nearly empty so we can just append stuff
|
||||
RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu8' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
|
||||
RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu17' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
|
||||
RUN sed -i '$ a export HADOOP_HOME=/usr/local/hadoop' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
|
||||
RUN sed -i '$ a export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
|
||||
RUN sed -i '$ a export HDFS_NAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
|
||||
|
|
|
@ -48,7 +48,6 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject</groupId>
|
||||
<artifactId>guice</artifactId>
|
||||
<version>${guice.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>aopalliance</groupId>
|
||||
|
@ -60,7 +59,7 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-multibindings</artifactId>
|
||||
<version>${guice.version}</version>
|
||||
<scope>provided</scope>
|
||||
<!--$NO-MVN-MAN-VER$ -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -145,7 +145,6 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject</groupId>
|
||||
<artifactId>guice</artifactId>
|
||||
<version>4.1.0</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -262,6 +262,12 @@
|
|||
<artifactId>reflections</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.annotation</groupId>
|
||||
<artifactId>javax.annotation-api</artifactId>
|
||||
<version>1.2</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<extensions>
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.apache.druid.segment;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.druid.collections.BlockingPool;
|
||||
import org.apache.druid.collections.DefaultBlockingPool;
|
||||
import org.apache.druid.collections.StupidPool;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
|
@ -44,6 +45,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig;
|
|||
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
|
||||
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
|
||||
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.GroupingEngine;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||
|
@ -70,8 +72,14 @@ public class MapVirtualColumnGroupByTest extends InitializedNullHandlingTest
|
|||
{
|
||||
final IncrementalIndex incrementalIndex = MapVirtualColumnTestBase.generateIndex();
|
||||
final GroupByQueryConfig config = new GroupByQueryConfig();
|
||||
|
||||
final BlockingPool<ByteBuffer> mergePool =
|
||||
new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 1);
|
||||
final GroupByStatsProvider groupByStatsProvider = new GroupByStatsProvider();
|
||||
|
||||
final GroupByResourcesReservationPool groupByResourcesReservationPool =
|
||||
new GroupByResourcesReservationPool(new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 1), config);
|
||||
new GroupByResourcesReservationPool(mergePool, config);
|
||||
|
||||
final GroupingEngine groupingEngine = new GroupingEngine(
|
||||
new DruidProcessingConfig()
|
||||
{
|
||||
|
@ -103,7 +111,8 @@ public class MapVirtualColumnGroupByTest extends InitializedNullHandlingTest
|
|||
groupByResourcesReservationPool,
|
||||
TestHelper.makeJsonMapper(),
|
||||
new DefaultObjectMapper(),
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER,
|
||||
groupByStatsProvider
|
||||
);
|
||||
|
||||
final GroupByQueryRunnerFactory factory = new GroupByQueryRunnerFactory(
|
||||
|
|
|
@ -173,13 +173,6 @@
|
|||
<artifactId>equalsverifier</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
|
||||
<version>${mockito.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
|
|
|
@ -63,6 +63,7 @@ import org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketc
|
|||
import org.apache.druid.query.groupby.GroupByQuery;
|
||||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.GroupingEngine;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
|
@ -272,7 +273,8 @@ public class DatasketchesProjectionTest extends InitializedNullHandlingTest
|
|||
TestHelper.makeJsonMapper(),
|
||||
TestHelper.makeSmileMapper(),
|
||||
(query, future) -> {
|
||||
}
|
||||
},
|
||||
new GroupByStatsProvider()
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -145,18 +145,12 @@
|
|||
<artifactId>easymock</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<version>${mockito.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.mysql</groupId>
|
||||
<artifactId>mysql-connector-j</artifactId>
|
||||
|
|
|
@ -348,7 +348,10 @@ public class MSQCompactionRunner implements CompactionRunner
|
|||
private static RowSignature getRowSignature(DataSchema dataSchema)
|
||||
{
|
||||
RowSignature.Builder rowSignatureBuilder = RowSignature.builder();
|
||||
rowSignatureBuilder.add(dataSchema.getTimestampSpec().getTimestampColumn(), ColumnType.LONG);
|
||||
if (dataSchema.getDimensionsSpec().isForceSegmentSortByTime() == true) {
|
||||
// If sort not forced by time, __time appears as part of dimensions in DimensionsSpec
|
||||
rowSignatureBuilder.add(dataSchema.getTimestampSpec().getTimestampColumn(), ColumnType.LONG);
|
||||
}
|
||||
if (!isQueryGranularityEmptyOrNone(dataSchema)) {
|
||||
// A virtual column for query granularity would have been added. Add corresponding column type.
|
||||
rowSignatureBuilder.add(TIME_VIRTUAL_COLUMN, ColumnType.LONG);
|
||||
|
@ -398,25 +401,31 @@ public class MSQCompactionRunner implements CompactionRunner
|
|||
|
||||
private static ColumnMappings getColumnMappings(DataSchema dataSchema)
|
||||
{
|
||||
List<ColumnMapping> columnMappings = dataSchema.getDimensionsSpec()
|
||||
.getDimensions()
|
||||
.stream()
|
||||
.map(dim -> new ColumnMapping(
|
||||
dim.getName(), dim.getName()))
|
||||
.collect(Collectors.toList());
|
||||
List<ColumnMapping> columnMappings = new ArrayList<>();
|
||||
// For scan queries, a virtual column is created from __time if a custom query granularity is provided. For
|
||||
// group-by queries, as insert needs __time, it will always be one of the dimensions. Since dimensions in groupby
|
||||
// aren't allowed to have time column as the output name, we map time dimension to TIME_VIRTUAL_COLUMN in
|
||||
// dimensions, and map it back to the time column here.
|
||||
String timeColumn = (isGroupBy(dataSchema) || !isQueryGranularityEmptyOrNone(dataSchema))
|
||||
? TIME_VIRTUAL_COLUMN
|
||||
: ColumnHolder.TIME_COLUMN_NAME;
|
||||
ColumnMapping timeColumnMapping = new ColumnMapping(timeColumn, ColumnHolder.TIME_COLUMN_NAME);
|
||||
if (dataSchema.getDimensionsSpec().isForceSegmentSortByTime()) {
|
||||
// When not sorted by time, the __time column is missing from dimensionsSpec
|
||||
columnMappings.add(timeColumnMapping);
|
||||
}
|
||||
columnMappings.addAll(
|
||||
dataSchema.getDimensionsSpec()
|
||||
.getDimensions()
|
||||
.stream()
|
||||
.map(dim -> dim.getName().equals(ColumnHolder.TIME_COLUMN_NAME)
|
||||
? timeColumnMapping
|
||||
: new ColumnMapping(dim.getName(), dim.getName()))
|
||||
.collect(Collectors.toList())
|
||||
);
|
||||
columnMappings.addAll(Arrays.stream(dataSchema.getAggregators())
|
||||
.map(agg -> new ColumnMapping(agg.getName(), agg.getName()))
|
||||
.collect(
|
||||
Collectors.toList()));
|
||||
if (isGroupBy(dataSchema) || !isQueryGranularityEmptyOrNone(dataSchema)) {
|
||||
// For scan queries, a virtual column is created from __time if a custom query granularity is provided. For
|
||||
// group-by queries, as insert needs __time, it will always be one of the dimensions. Since dimensions in groupby
|
||||
// aren't allowed to have time column as the output name, we map time dimension to TIME_VIRTUAL_COLUMN in
|
||||
// dimensions, and map it back to the time column here.
|
||||
columnMappings.add(new ColumnMapping(TIME_VIRTUAL_COLUMN, ColumnHolder.TIME_COLUMN_NAME));
|
||||
} else {
|
||||
columnMappings.add(new ColumnMapping(ColumnHolder.TIME_COLUMN_NAME, ColumnHolder.TIME_COLUMN_NAME));
|
||||
}
|
||||
.collect(Collectors.toList()));
|
||||
return new ColumnMappings(columnMappings);
|
||||
}
|
||||
|
||||
|
@ -431,6 +440,19 @@ public class MSQCompactionRunner implements CompactionRunner
|
|||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
private static Map<String, Object> buildQueryContext(
|
||||
Map<String, Object> taskContext,
|
||||
DataSchema dataSchema
|
||||
)
|
||||
{
|
||||
if (dataSchema.getDimensionsSpec().isForceSegmentSortByTime()) {
|
||||
return taskContext;
|
||||
}
|
||||
Map<String, Object> queryContext = new HashMap<>(taskContext);
|
||||
queryContext.put(MultiStageQueryContext.CTX_FORCE_TIME_SORT, false);
|
||||
return queryContext;
|
||||
}
|
||||
|
||||
private static Query<?> buildScanQuery(
|
||||
CompactionTask compactionTask,
|
||||
Interval interval,
|
||||
|
@ -447,7 +469,7 @@ public class MSQCompactionRunner implements CompactionRunner
|
|||
.columnTypes(rowSignature.getColumnTypes())
|
||||
.intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(interval)))
|
||||
.filters(dataSchema.getTransformSpec().getFilter())
|
||||
.context(compactionTask.getContext());
|
||||
.context(buildQueryContext(compactionTask.getContext(), dataSchema));
|
||||
|
||||
if (compactionTask.getTuningConfig() != null && compactionTask.getTuningConfig().getPartitionsSpec() != null) {
|
||||
List<OrderByColumnSpec> orderByColumnSpecs = getOrderBySpec(compactionTask.getTuningConfig().getPartitionsSpec());
|
||||
|
@ -599,7 +621,7 @@ public class MSQCompactionRunner implements CompactionRunner
|
|||
.setDimensions(getAggregateDimensions(dataSchema, inputColToVirtualCol))
|
||||
.setAggregatorSpecs(Arrays.asList(dataSchema.getAggregators()))
|
||||
.setPostAggregatorSpecs(postAggregators)
|
||||
.setContext(compactionTask.getContext())
|
||||
.setContext(buildQueryContext(compactionTask.getContext(), dataSchema))
|
||||
.setInterval(interval);
|
||||
|
||||
if (compactionTask.getTuningConfig() != null && compactionTask.getTuningConfig().getPartitionsSpec() != null) {
|
||||
|
|
|
@ -60,7 +60,9 @@ import org.apache.druid.query.filter.DimFilter;
|
|||
import org.apache.druid.query.filter.SelectorDimFilter;
|
||||
import org.apache.druid.query.groupby.GroupByQuery;
|
||||
import org.apache.druid.query.scan.ScanQuery;
|
||||
import org.apache.druid.segment.AutoTypeColumnSchema;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.NestedDataColumnSchema;
|
||||
import org.apache.druid.segment.column.ColumnHolder;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.data.CompressionFactory;
|
||||
|
@ -72,14 +74,13 @@ import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
|
|||
import org.apache.druid.segment.transform.TransformSpec;
|
||||
import org.apache.druid.server.coordinator.CompactionConfigValidationResult;
|
||||
import org.apache.druid.sql.calcite.parser.DruidSqlInsert;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.joda.time.Interval;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -103,10 +104,14 @@ public class MSQCompactionRunnerTest
|
|||
private static final StringDimensionSchema STRING_DIMENSION = new StringDimensionSchema("string_dim", null, false);
|
||||
private static final StringDimensionSchema MV_STRING_DIMENSION = new StringDimensionSchema("mv_string_dim", null, null);
|
||||
private static final LongDimensionSchema LONG_DIMENSION = new LongDimensionSchema("long_dim");
|
||||
private static final NestedDataColumnSchema NESTED_DIMENSION = new NestedDataColumnSchema("nested_dim", 4);
|
||||
private static final AutoTypeColumnSchema AUTO_DIMENSION = new AutoTypeColumnSchema("auto_dim", null);
|
||||
private static final List<DimensionSchema> DIMENSIONS = ImmutableList.of(
|
||||
STRING_DIMENSION,
|
||||
LONG_DIMENSION,
|
||||
MV_STRING_DIMENSION
|
||||
MV_STRING_DIMENSION,
|
||||
NESTED_DIMENSION,
|
||||
AUTO_DIMENSION
|
||||
);
|
||||
private static final Map<Interval, DataSchema> INTERVAL_DATASCHEMAS = ImmutableMap.of(
|
||||
COMPACTION_INTERVAL,
|
||||
|
@ -144,7 +149,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
CompactionConfigValidationResult validationResult = MSQ_COMPACTION_RUNNER.validateCompactionTask(
|
||||
|
@ -166,7 +170,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
|
||||
|
@ -180,7 +183,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
Assert.assertTrue(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
|
||||
|
@ -195,7 +197,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
|
@ -218,7 +219,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
|
@ -240,7 +240,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
|
||||
|
@ -254,7 +253,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
Assert.assertTrue(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
|
||||
|
@ -268,7 +266,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
new ClientCompactionTaskGranularitySpec(null, Granularities.ALL, null),
|
||||
null,
|
||||
null
|
||||
);
|
||||
Assert.assertTrue(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
|
||||
|
@ -282,7 +279,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
new ClientCompactionTaskGranularitySpec(null, null, false),
|
||||
null,
|
||||
AGGREGATORS.toArray(new AggregatorFactory[0])
|
||||
);
|
||||
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
|
||||
|
@ -296,7 +292,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
new ClientCompactionTaskGranularitySpec(null, null, true),
|
||||
null,
|
||||
null
|
||||
);
|
||||
Assert.assertFalse(MSQ_COMPACTION_RUNNER.validateCompactionTask(compactionTask, INTERVAL_DATASCHEMAS).isValid());
|
||||
|
@ -313,7 +308,6 @@ public class MSQCompactionRunnerTest
|
|||
null,
|
||||
Collections.emptyMap(),
|
||||
new ClientCompactionTaskGranularitySpec(null, null, true),
|
||||
null,
|
||||
new AggregatorFactory[]{new LongSumAggregatorFactory(outputColName, inputColName)}
|
||||
);
|
||||
CompactionConfigValidationResult validationResult = MSQ_COMPACTION_RUNNER.validateCompactionTask(
|
||||
|
@ -330,13 +324,13 @@ public class MSQCompactionRunnerTest
|
|||
@Test
|
||||
public void testRunCompactionTasksWithEmptyTaskListFails() throws Exception
|
||||
{
|
||||
CompactionTask compactionTask = createCompactionTask(null, null, Collections.emptyMap(), null, null, null);
|
||||
CompactionTask compactionTask = createCompactionTask(null, null, Collections.emptyMap(), null, null);
|
||||
TaskStatus taskStatus = MSQ_COMPACTION_RUNNER.runCompactionTasks(compactionTask, Collections.emptyMap(), null);
|
||||
Assert.assertTrue(taskStatus.isFailure());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMSQControllerTaskSpecWithScanIsValid() throws JsonProcessingException
|
||||
public void testCompactionConfigWithoutMetricsSpecProducesCorrectSpec() throws JsonProcessingException
|
||||
{
|
||||
DimFilter dimFilter = new SelectorDimFilter("dim1", "foo", null);
|
||||
|
||||
|
@ -345,7 +339,6 @@ public class MSQCompactionRunnerTest
|
|||
dimFilter,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
|
@ -357,7 +350,7 @@ public class MSQCompactionRunnerTest
|
|||
.withGranularity(
|
||||
new UniformGranularitySpec(
|
||||
SEGMENT_GRANULARITY.getDefaultGranularity(),
|
||||
null,
|
||||
QUERY_GRANULARITY.getDefaultGranularity(),
|
||||
false,
|
||||
Collections.singletonList(COMPACTION_INTERVAL)
|
||||
)
|
||||
|
@ -375,37 +368,37 @@ public class MSQCompactionRunnerTest
|
|||
|
||||
MSQSpec actualMSQSpec = msqControllerTask.getQuerySpec();
|
||||
|
||||
Assert.assertEquals(
|
||||
new MSQTuningConfig(
|
||||
1,
|
||||
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
|
||||
MAX_ROWS_PER_SEGMENT,
|
||||
null,
|
||||
createIndexSpec()
|
||||
),
|
||||
actualMSQSpec.getTuningConfig()
|
||||
);
|
||||
Assert.assertEquals(
|
||||
new DataSourceMSQDestination(
|
||||
DATA_SOURCE,
|
||||
SEGMENT_GRANULARITY.getDefaultGranularity(),
|
||||
null,
|
||||
Collections.singletonList(COMPACTION_INTERVAL),
|
||||
DIMENSIONS.stream().collect(Collectors.toMap(DimensionSchema::getName, Function.identity())),
|
||||
null
|
||||
),
|
||||
actualMSQSpec.getDestination()
|
||||
);
|
||||
Assert.assertEquals(getExpectedTuningConfig(), actualMSQSpec.getTuningConfig());
|
||||
Assert.assertEquals(getExpectedDestination(), actualMSQSpec.getDestination());
|
||||
|
||||
Assert.assertTrue(actualMSQSpec.getQuery() instanceof ScanQuery);
|
||||
ScanQuery scanQuery = (ScanQuery) actualMSQSpec.getQuery();
|
||||
|
||||
List<String> expectedColumns = new ArrayList<>();
|
||||
List<ColumnType> expectedColumnTypes = new ArrayList<>();
|
||||
// Add __time since this is a time-ordered query which doesn't have __time explicitly defined in dimensionsSpec
|
||||
expectedColumns.add(ColumnHolder.TIME_COLUMN_NAME);
|
||||
expectedColumnTypes.add(ColumnType.LONG);
|
||||
|
||||
// Add TIME_VIRTUAL_COLUMN since a query granularity is specified
|
||||
expectedColumns.add(MSQCompactionRunner.TIME_VIRTUAL_COLUMN);
|
||||
expectedColumnTypes.add(ColumnType.LONG);
|
||||
|
||||
expectedColumns.addAll(DIMENSIONS.stream().map(DimensionSchema::getName).collect(Collectors.toList()));
|
||||
expectedColumnTypes.addAll(DIMENSIONS.stream().map(DimensionSchema::getColumnType).collect(Collectors.toList()));
|
||||
|
||||
Assert.assertEquals(expectedColumns, scanQuery.getColumns());
|
||||
Assert.assertEquals(expectedColumnTypes, scanQuery.getColumnTypes());
|
||||
|
||||
Assert.assertEquals(dimFilter, scanQuery.getFilter());
|
||||
Assert.assertEquals(
|
||||
JSON_MAPPER.writeValueAsString(SEGMENT_GRANULARITY.toString()),
|
||||
msqControllerTask.getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY)
|
||||
);
|
||||
Assert.assertNull(msqControllerTask.getContext().get(DruidSqlInsert.SQL_INSERT_QUERY_GRANULARITY));
|
||||
Assert.assertEquals(
|
||||
JSON_MAPPER.writeValueAsString(QUERY_GRANULARITY.toString()),
|
||||
msqControllerTask.getContext().get(DruidSqlInsert.SQL_INSERT_QUERY_GRANULARITY)
|
||||
);
|
||||
Assert.assertEquals(WorkerAssignmentStrategy.MAX, actualMSQSpec.getAssignmentStrategy());
|
||||
Assert.assertEquals(
|
||||
PARTITION_DIMENSIONS.stream().map(OrderBy::ascending).collect(Collectors.toList()),
|
||||
|
@ -414,7 +407,60 @@ public class MSQCompactionRunnerTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testMSQControllerTaskSpecWithAggregatorsIsValid() throws JsonProcessingException
|
||||
public void testCompactionConfigWithSortOnNonTimeDimensionsProducesCorrectSpec() throws JsonProcessingException
|
||||
{
|
||||
List<DimensionSchema> nonTimeSortedDimensions = ImmutableList.of(
|
||||
STRING_DIMENSION,
|
||||
new LongDimensionSchema(ColumnHolder.TIME_COLUMN_NAME),
|
||||
LONG_DIMENSION
|
||||
);
|
||||
CompactionTask taskCreatedWithTransformSpec = createCompactionTask(
|
||||
new DynamicPartitionsSpec(TARGET_ROWS_PER_SEGMENT, null),
|
||||
null,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
// Set forceSegmentSortByTime=false to enable non-time order
|
||||
DimensionsSpec dimensionsSpec = DimensionsSpec.builder()
|
||||
.setDimensions(nonTimeSortedDimensions)
|
||||
.setForceSegmentSortByTime(false)
|
||||
.build();
|
||||
DataSchema dataSchema =
|
||||
DataSchema.builder()
|
||||
.withDataSource(DATA_SOURCE)
|
||||
.withTimestamp(new TimestampSpec(TIMESTAMP_COLUMN, null, null))
|
||||
.withDimensions(dimensionsSpec)
|
||||
.withGranularity(
|
||||
new UniformGranularitySpec(
|
||||
SEGMENT_GRANULARITY.getDefaultGranularity(),
|
||||
null,
|
||||
false,
|
||||
Collections.singletonList(COMPACTION_INTERVAL)
|
||||
)
|
||||
)
|
||||
.build();
|
||||
|
||||
List<MSQControllerTask> msqControllerTasks = MSQ_COMPACTION_RUNNER.createMsqControllerTasks(
|
||||
taskCreatedWithTransformSpec,
|
||||
Collections.singletonMap(COMPACTION_INTERVAL, dataSchema)
|
||||
);
|
||||
|
||||
MSQSpec actualMSQSpec = Iterables.getOnlyElement(msqControllerTasks).getQuerySpec();
|
||||
|
||||
Assert.assertTrue(actualMSQSpec.getQuery() instanceof ScanQuery);
|
||||
ScanQuery scanQuery = (ScanQuery) actualMSQSpec.getQuery();
|
||||
|
||||
// Dimensions should already list __time and the order should remain intact
|
||||
Assert.assertEquals(
|
||||
nonTimeSortedDimensions.stream().map(DimensionSchema::getName).collect(Collectors.toList()),
|
||||
scanQuery.getColumns()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompactionConfigWithMetricsSpecProducesCorrectSpec() throws JsonProcessingException
|
||||
{
|
||||
DimFilter dimFilter = new SelectorDimFilter("dim1", "foo", null);
|
||||
|
||||
|
@ -423,7 +469,6 @@ public class MSQCompactionRunnerTest
|
|||
dimFilter,
|
||||
Collections.emptyMap(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
|
@ -444,7 +489,6 @@ public class MSQCompactionRunnerTest
|
|||
multiValuedDimensions
|
||||
);
|
||||
|
||||
|
||||
List<MSQControllerTask> msqControllerTasks = MSQ_COMPACTION_RUNNER.createMsqControllerTasks(
|
||||
taskCreatedWithTransformSpec,
|
||||
Collections.singletonMap(COMPACTION_INTERVAL, dataSchema)
|
||||
|
@ -454,27 +498,8 @@ public class MSQCompactionRunnerTest
|
|||
|
||||
MSQSpec actualMSQSpec = msqControllerTask.getQuerySpec();
|
||||
|
||||
Assert.assertEquals(
|
||||
new MSQTuningConfig(
|
||||
1,
|
||||
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
|
||||
MAX_ROWS_PER_SEGMENT,
|
||||
null,
|
||||
createIndexSpec()
|
||||
),
|
||||
actualMSQSpec.getTuningConfig()
|
||||
);
|
||||
Assert.assertEquals(
|
||||
new DataSourceMSQDestination(
|
||||
DATA_SOURCE,
|
||||
SEGMENT_GRANULARITY.getDefaultGranularity(),
|
||||
null,
|
||||
Collections.singletonList(COMPACTION_INTERVAL),
|
||||
DIMENSIONS.stream().collect(Collectors.toMap(DimensionSchema::getName, Function.identity())),
|
||||
null
|
||||
),
|
||||
actualMSQSpec.getDestination()
|
||||
);
|
||||
Assert.assertEquals(getExpectedTuningConfig(), actualMSQSpec.getTuningConfig());
|
||||
Assert.assertEquals(getExpectedDestination(), actualMSQSpec.getDestination());
|
||||
|
||||
Assert.assertTrue(actualMSQSpec.getQuery() instanceof GroupByQuery);
|
||||
GroupByQuery groupByQuery = (GroupByQuery) actualMSQSpec.getQuery();
|
||||
|
@ -490,30 +515,32 @@ public class MSQCompactionRunnerTest
|
|||
);
|
||||
Assert.assertEquals(WorkerAssignmentStrategy.MAX, actualMSQSpec.getAssignmentStrategy());
|
||||
|
||||
|
||||
// Since only MV_STRING_DIMENSION is indicated to be MVD by the CombinedSchema, conversion to array should happen
|
||||
// only for that column.
|
||||
List<DimensionSpec> expectedDimensionSpec = DIMENSIONS.stream()
|
||||
.filter(dim -> !MV_STRING_DIMENSION.getName()
|
||||
.equals(dim.getName()))
|
||||
.map(dim -> new DefaultDimensionSpec(
|
||||
dim.getName(),
|
||||
dim.getName(),
|
||||
dim.getColumnType()
|
||||
))
|
||||
.collect(
|
||||
Collectors.toList());
|
||||
List<DimensionSpec> expectedDimensionSpec = new ArrayList<>();
|
||||
expectedDimensionSpec.add(
|
||||
new DefaultDimensionSpec(MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
|
||||
MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
|
||||
ColumnType.LONG)
|
||||
new DefaultDimensionSpec(
|
||||
MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
|
||||
MSQCompactionRunner.TIME_VIRTUAL_COLUMN,
|
||||
ColumnType.LONG
|
||||
)
|
||||
);
|
||||
String mvToArrayStringDim = MSQCompactionRunner.ARRAY_VIRTUAL_COLUMN_PREFIX + MV_STRING_DIMENSION.getName();
|
||||
expectedDimensionSpec.add(new DefaultDimensionSpec(mvToArrayStringDim, mvToArrayStringDim, ColumnType.STRING_ARRAY));
|
||||
MatcherAssert.assertThat(
|
||||
expectedDimensionSpec,
|
||||
Matchers.containsInAnyOrder(groupByQuery.getDimensions().toArray(new DimensionSpec[0]))
|
||||
);
|
||||
// Since only MV_STRING_DIMENSION is indicated to be MVD by the CombinedSchema, conversion to array should happen
|
||||
// only for that column.
|
||||
expectedDimensionSpec.addAll(DIMENSIONS.stream()
|
||||
.map(dim ->
|
||||
MV_STRING_DIMENSION.getName().equals(dim.getName())
|
||||
? new DefaultDimensionSpec(
|
||||
mvToArrayStringDim,
|
||||
mvToArrayStringDim,
|
||||
ColumnType.STRING_ARRAY
|
||||
)
|
||||
: new DefaultDimensionSpec(
|
||||
dim.getName(),
|
||||
dim.getName(),
|
||||
dim.getColumnType()
|
||||
))
|
||||
.collect(Collectors.toList()));
|
||||
Assert.assertEquals(expectedDimensionSpec, groupByQuery.getDimensions());
|
||||
}
|
||||
|
||||
private CompactionTask createCompactionTask(
|
||||
|
@ -521,7 +548,6 @@ public class MSQCompactionRunnerTest
|
|||
@Nullable DimFilter dimFilter,
|
||||
Map<String, Object> contextParams,
|
||||
@Nullable ClientCompactionTaskGranularitySpec granularitySpec,
|
||||
@Nullable List<DimensionSchema> dimensionSchemas,
|
||||
@Nullable AggregatorFactory[] metricsSpec
|
||||
)
|
||||
{
|
||||
|
@ -545,7 +571,7 @@ public class MSQCompactionRunnerTest
|
|||
))
|
||||
.transformSpec(transformSpec)
|
||||
.granularitySpec(granularitySpec)
|
||||
.dimensionsSpec(new DimensionsSpec(dimensionSchemas))
|
||||
.dimensionsSpec(new DimensionsSpec(null))
|
||||
.metricsSpec(metricsSpec)
|
||||
.compactionRunner(MSQ_COMPACTION_RUNNER)
|
||||
.context(context);
|
||||
|
@ -580,4 +606,27 @@ public class MSQCompactionRunnerTest
|
|||
.withLongEncoding(CompressionFactory.LongEncodingStrategy.LONGS)
|
||||
.build();
|
||||
}
|
||||
|
||||
private static DataSourceMSQDestination getExpectedDestination()
|
||||
{
|
||||
return new DataSourceMSQDestination(
|
||||
DATA_SOURCE,
|
||||
SEGMENT_GRANULARITY.getDefaultGranularity(),
|
||||
null,
|
||||
Collections.singletonList(COMPACTION_INTERVAL),
|
||||
DIMENSIONS.stream().collect(Collectors.toMap(DimensionSchema::getName, Function.identity())),
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
private static MSQTuningConfig getExpectedTuningConfig()
|
||||
{
|
||||
return new MSQTuningConfig(
|
||||
1,
|
||||
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
|
||||
MAX_ROWS_PER_SEGMENT,
|
||||
null,
|
||||
createIndexSpec()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,13 +62,12 @@ public class CalciteUnionQueryMSQTest extends CalciteUnionQueryTest
|
|||
*/
|
||||
@Test
|
||||
@Override
|
||||
public void testUnionIsUnplannable()
|
||||
public void testUnionDifferentColumnOrder()
|
||||
{
|
||||
assertQueryIsUnplannable(
|
||||
"SELECT dim2, dim1, m1 FROM foo2 UNION SELECT dim1, dim2, m1 FROM foo",
|
||||
"SQL requires union between two tables and column names queried for each table are different Left: [dim2, dim1, m1], Right: [dim1, dim2, m1]."
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
@Disabled("Ignored till MSQ can plan UNION ALL with any operand")
|
||||
|
|
|
@ -511,7 +511,6 @@ public class MSQTestBase extends BaseCalciteQueryTest
|
|||
binder -> binder.bind(SegmentManager.class).toInstance(EasyMock.createMock(SegmentManager.class)),
|
||||
new JoinableFactoryModule(),
|
||||
new IndexingServiceTuningConfigModule(),
|
||||
new MSQIndexingModule(),
|
||||
Modules.override(new MSQSqlModule()).with(
|
||||
binder -> {
|
||||
// Our Guice configuration currently requires bindings to exist even if they aren't ever used, the
|
||||
|
@ -540,6 +539,7 @@ public class MSQTestBase extends BaseCalciteQueryTest
|
|||
|
||||
objectMapper = setupObjectMapper(injector);
|
||||
objectMapper.registerModules(new StorageConnectorModule().getJacksonModules());
|
||||
objectMapper.registerModules(new MSQIndexingModule().getJacksonModules());
|
||||
objectMapper.registerModules(sqlModule.getJacksonModules());
|
||||
objectMapper.registerModules(BuiltInTypesModule.getJacksonModulesList());
|
||||
|
||||
|
@ -697,7 +697,6 @@ public class MSQTestBase extends BaseCalciteQueryTest
|
|||
break;
|
||||
default:
|
||||
throw new ISE("Cannot query segment %s in test runner", segmentId);
|
||||
|
||||
}
|
||||
Segment segment = new Segment()
|
||||
{
|
||||
|
|
|
@ -17,6 +17,6 @@
|
|||
~ under the License.
|
||||
-->
|
||||
|
||||
This module contains a simple implementation of [SslContext](http://docs.oracle.com/javase/8/docs/api/javax/net/ssl/SSLContext.html)
|
||||
This module contains a simple implementation of [SslContext](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/javax/net/ssl/SSLContext.html)
|
||||
that will be injected to be used with HttpClient that Druid nodes use internally to communicate with each other.
|
||||
More details [here](https://druid.apache.org/docs/latest/development/extensions-core/simple-client-sslcontext.html).
|
||||
|
|
|
@ -145,18 +145,12 @@
|
|||
<artifactId>equalsverifier</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<version>${mockito.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<profiles>
|
||||
|
|
|
@ -293,7 +293,7 @@ public class IndexGeneratorJob implements Jobby
|
|||
AggregatorFactory[] aggs,
|
||||
HadoopDruidIndexerConfig config,
|
||||
@Nullable Iterable<String> oldDimOrder,
|
||||
Map<String, ColumnFormat> oldCapabilities
|
||||
@Nullable Map<String, ColumnFormat> oldCapabilities
|
||||
)
|
||||
{
|
||||
final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
|
||||
|
|
|
@ -91,6 +91,7 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-multibindings</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.ws.rs</groupId>
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.druid.indexing.common.LockGranularity;
|
|||
import org.apache.druid.indexing.common.task.IndexTaskUtils;
|
||||
import org.apache.druid.indexing.common.task.Task;
|
||||
import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator;
|
||||
import org.apache.druid.indexing.overlord.Segments;
|
||||
import org.apache.druid.indexing.overlord.TaskLockbox;
|
||||
import org.apache.druid.indexing.overlord.config.TaskLockConfig;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
|
@ -41,6 +40,7 @@ import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
|
|||
import org.apache.druid.query.DruidMetrics;
|
||||
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.timeline.Partitions;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -87,6 +87,8 @@ public class SegmentAllocationQueue
|
|||
private final ConcurrentHashMap<AllocateRequestKey, AllocateRequestBatch> keyToBatch = new ConcurrentHashMap<>();
|
||||
private final BlockingDeque<AllocateRequestBatch> processingQueue = new LinkedBlockingDeque<>(MAX_QUEUE_SIZE);
|
||||
|
||||
private final boolean reduceMetadataIO;
|
||||
|
||||
@Inject
|
||||
public SegmentAllocationQueue(
|
||||
TaskLockbox taskLockbox,
|
||||
|
@ -100,6 +102,7 @@ public class SegmentAllocationQueue
|
|||
this.taskLockbox = taskLockbox;
|
||||
this.metadataStorage = metadataStorage;
|
||||
this.maxWaitTimeMillis = taskLockConfig.getBatchAllocationWaitTime();
|
||||
this.reduceMetadataIO = taskLockConfig.isBatchAllocationReduceMetadataIO();
|
||||
|
||||
this.executor = taskLockConfig.isBatchSegmentAllocation()
|
||||
? executorFactory.create(1, "SegmentAllocQueue-%s") : null;
|
||||
|
@ -380,13 +383,11 @@ public class SegmentAllocationQueue
|
|||
|
||||
private Set<DataSegment> retrieveUsedSegments(AllocateRequestKey key)
|
||||
{
|
||||
return new HashSet<>(
|
||||
metadataStorage.retrieveUsedSegmentsForInterval(
|
||||
key.dataSource,
|
||||
key.preferredAllocationInterval,
|
||||
Segments.ONLY_VISIBLE
|
||||
)
|
||||
);
|
||||
return metadataStorage.getSegmentTimelineForAllocation(
|
||||
key.dataSource,
|
||||
key.preferredAllocationInterval,
|
||||
(key.lockGranularity == LockGranularity.TIME_CHUNK) && reduceMetadataIO
|
||||
).findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE);
|
||||
}
|
||||
|
||||
private int allocateSegmentsForBatch(AllocateRequestBatch requestBatch, Set<DataSegment> usedSegments)
|
||||
|
@ -493,7 +494,8 @@ public class SegmentAllocationQueue
|
|||
requestKey.dataSource,
|
||||
tryInterval,
|
||||
requestKey.skipSegmentLineageCheck,
|
||||
requestKey.lockGranularity
|
||||
requestKey.lockGranularity,
|
||||
reduceMetadataIO
|
||||
);
|
||||
|
||||
int successfulRequests = 0;
|
||||
|
|
|
@ -1515,7 +1515,7 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
|
|||
boolean shouldRunPendingTasks = false;
|
||||
|
||||
// must be synchronized while iterating:
|
||||
// https://docs.oracle.com/javase/8/docs/api/java/util/Collections.html#synchronizedSet-java.util.Set-
|
||||
// https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#synchronizedSet(java.util.Set)
|
||||
synchronized (blackListedWorkers) {
|
||||
for (Iterator<ZkWorker> iterator = blackListedWorkers.iterator(); iterator.hasNext(); ) {
|
||||
ZkWorker zkWorker = iterator.next();
|
||||
|
|
|
@ -466,6 +466,8 @@ public class TaskLockbox
|
|||
* @param skipSegmentLineageCheck Whether lineage check is to be skipped
|
||||
* (this is true for streaming ingestion)
|
||||
* @param lockGranularity Granularity of task lock
|
||||
* @param reduceMetadataIO Whether to skip fetching payloads for all used
|
||||
* segments and rely on their IDs instead.
|
||||
* @return List of allocation results in the same order as the requests.
|
||||
*/
|
||||
public List<SegmentAllocateResult> allocateSegments(
|
||||
|
@ -473,7 +475,8 @@ public class TaskLockbox
|
|||
String dataSource,
|
||||
Interval interval,
|
||||
boolean skipSegmentLineageCheck,
|
||||
LockGranularity lockGranularity
|
||||
LockGranularity lockGranularity,
|
||||
boolean reduceMetadataIO
|
||||
)
|
||||
{
|
||||
log.info("Allocating [%d] segments for datasource [%s], interval [%s]", requests.size(), dataSource, interval);
|
||||
|
@ -487,9 +490,15 @@ public class TaskLockbox
|
|||
if (isTimeChunkLock) {
|
||||
// For time-chunk locking, segment must be allocated only after acquiring the lock
|
||||
holderList.getPending().forEach(holder -> acquireTaskLock(holder, true));
|
||||
allocateSegmentIds(dataSource, interval, skipSegmentLineageCheck, holderList.getPending());
|
||||
allocateSegmentIds(
|
||||
dataSource,
|
||||
interval,
|
||||
skipSegmentLineageCheck,
|
||||
holderList.getPending(),
|
||||
reduceMetadataIO
|
||||
);
|
||||
} else {
|
||||
allocateSegmentIds(dataSource, interval, skipSegmentLineageCheck, holderList.getPending());
|
||||
allocateSegmentIds(dataSource, interval, skipSegmentLineageCheck, holderList.getPending(), false);
|
||||
holderList.getPending().forEach(holder -> acquireTaskLock(holder, false));
|
||||
}
|
||||
holderList.getPending().forEach(SegmentAllocationHolder::markSucceeded);
|
||||
|
@ -702,12 +711,12 @@ public class TaskLockbox
|
|||
* for the given requests. Updates the holder with the allocated segment if
|
||||
* the allocation succeeds, otherwise marks it as failed.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
void allocateSegmentIds(
|
||||
private void allocateSegmentIds(
|
||||
String dataSource,
|
||||
Interval interval,
|
||||
boolean skipSegmentLineageCheck,
|
||||
Collection<SegmentAllocationHolder> holders
|
||||
Collection<SegmentAllocationHolder> holders,
|
||||
boolean reduceMetadataIO
|
||||
)
|
||||
{
|
||||
if (holders.isEmpty()) {
|
||||
|
@ -724,7 +733,8 @@ public class TaskLockbox
|
|||
dataSource,
|
||||
interval,
|
||||
skipSegmentLineageCheck,
|
||||
createRequests
|
||||
createRequests,
|
||||
reduceMetadataIO
|
||||
);
|
||||
|
||||
for (SegmentAllocationHolder holder : holders) {
|
||||
|
|
|
@ -36,6 +36,9 @@ public class TaskLockConfig
|
|||
@JsonProperty
|
||||
private long batchAllocationWaitTime = 0L;
|
||||
|
||||
@JsonProperty
|
||||
private boolean batchAllocationReduceMetadataIO = true;
|
||||
|
||||
public boolean isForceTimeChunkLock()
|
||||
{
|
||||
return forceTimeChunkLock;
|
||||
|
@ -50,4 +53,10 @@ public class TaskLockConfig
|
|||
{
|
||||
return batchAllocationWaitTime;
|
||||
}
|
||||
|
||||
public boolean isBatchAllocationReduceMetadataIO()
|
||||
{
|
||||
return batchAllocationReduceMetadataIO;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -93,21 +93,28 @@ public class SegmentAllocateActionTest
|
|||
|
||||
private SegmentAllocationQueue allocationQueue;
|
||||
|
||||
@Parameterized.Parameters(name = "granularity = {0}, useBatch = {1}")
|
||||
@Parameterized.Parameters(name = "granularity = {0}, useBatch = {1}, skipSegmentPayloadFetchForAllocation = {2}")
|
||||
public static Iterable<Object[]> constructorFeeder()
|
||||
{
|
||||
return ImmutableList.of(
|
||||
new Object[]{LockGranularity.SEGMENT, true},
|
||||
new Object[]{LockGranularity.SEGMENT, false},
|
||||
new Object[]{LockGranularity.TIME_CHUNK, true},
|
||||
new Object[]{LockGranularity.TIME_CHUNK, false}
|
||||
new Object[]{LockGranularity.SEGMENT, true, true},
|
||||
new Object[]{LockGranularity.SEGMENT, true, false},
|
||||
new Object[]{LockGranularity.SEGMENT, false, false},
|
||||
new Object[]{LockGranularity.TIME_CHUNK, true, true},
|
||||
new Object[]{LockGranularity.TIME_CHUNK, true, false},
|
||||
new Object[]{LockGranularity.TIME_CHUNK, false, false}
|
||||
);
|
||||
}
|
||||
|
||||
public SegmentAllocateActionTest(LockGranularity lockGranularity, boolean useBatch)
|
||||
public SegmentAllocateActionTest(
|
||||
LockGranularity lockGranularity,
|
||||
boolean useBatch,
|
||||
boolean skipSegmentPayloadFetchForAllocation
|
||||
)
|
||||
{
|
||||
this.lockGranularity = lockGranularity;
|
||||
this.useBatch = useBatch;
|
||||
this.taskActionTestKit.setSkipSegmentPayloadFetchForAllocation(skipSegmentPayloadFetchForAllocation);
|
||||
}
|
||||
|
||||
@Before
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.junit.Assert;
|
|||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -44,6 +46,7 @@ import java.util.concurrent.Future;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class SegmentAllocationQueueTest
|
||||
{
|
||||
@Rule
|
||||
|
@ -54,6 +57,19 @@ public class SegmentAllocationQueueTest
|
|||
private StubServiceEmitter emitter;
|
||||
private BlockingExecutorService executor;
|
||||
|
||||
private final boolean reduceMetadataIO;
|
||||
|
||||
@Parameterized.Parameters(name = "reduceMetadataIO = {0}")
|
||||
public static Object[][] getTestParameters()
|
||||
{
|
||||
return new Object[][]{{true}, {false}};
|
||||
}
|
||||
|
||||
public SegmentAllocationQueueTest(boolean reduceMetadataIO)
|
||||
{
|
||||
this.reduceMetadataIO = reduceMetadataIO;
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp()
|
||||
{
|
||||
|
@ -73,6 +89,12 @@ public class SegmentAllocationQueueTest
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBatchAllocationReduceMetadataIO()
|
||||
{
|
||||
return reduceMetadataIO;
|
||||
}
|
||||
};
|
||||
|
||||
allocationQueue = new SegmentAllocationQueue(
|
||||
|
|
|
@ -58,6 +58,8 @@ public class TaskActionTestKit extends ExternalResource
|
|||
private SegmentSchemaManager segmentSchemaManager;
|
||||
private SegmentSchemaCache segmentSchemaCache;
|
||||
|
||||
private boolean skipSegmentPayloadFetchForAllocation = new TaskLockConfig().isBatchAllocationReduceMetadataIO();
|
||||
|
||||
public TaskLockbox getTaskLockbox()
|
||||
{
|
||||
return taskLockbox;
|
||||
|
@ -78,6 +80,11 @@ public class TaskActionTestKit extends ExternalResource
|
|||
return taskActionToolbox;
|
||||
}
|
||||
|
||||
public void setSkipSegmentPayloadFetchForAllocation(boolean skipSegmentPayloadFetchForAllocation)
|
||||
{
|
||||
this.skipSegmentPayloadFetchForAllocation = skipSegmentPayloadFetchForAllocation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void before()
|
||||
{
|
||||
|
@ -126,6 +133,12 @@ public class TaskActionTestKit extends ExternalResource
|
|||
{
|
||||
return 10L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBatchAllocationReduceMetadataIO()
|
||||
{
|
||||
return skipSegmentPayloadFetchForAllocation;
|
||||
}
|
||||
};
|
||||
|
||||
taskActionToolbox = new TaskActionToolbox(
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.druid.metadata.ReplaceTaskLock;
|
|||
import org.apache.druid.segment.SegmentSchemaMapping;
|
||||
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.timeline.SegmentTimeline;
|
||||
import org.apache.druid.timeline.partition.PartialShardSpec;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.Interval;
|
||||
|
@ -168,7 +169,8 @@ public class TestIndexerMetadataStorageCoordinator implements IndexerMetadataSto
|
|||
String dataSource,
|
||||
Interval interval,
|
||||
boolean skipSegmentLineageCheck,
|
||||
List<SegmentCreateRequest> requests
|
||||
List<SegmentCreateRequest> requests,
|
||||
boolean isTimeChunk
|
||||
)
|
||||
{
|
||||
return Collections.emptyMap();
|
||||
|
@ -332,6 +334,20 @@ public class TestIndexerMetadataStorageCoordinator implements IndexerMetadataSto
|
|||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SegmentTimeline getSegmentTimelineForAllocation(
|
||||
String dataSource,
|
||||
Interval interval,
|
||||
boolean skipSegmentPayloadFetchForAllocation
|
||||
)
|
||||
{
|
||||
return SegmentTimeline.forSegments(retrieveUsedSegmentsForIntervals(
|
||||
dataSource,
|
||||
Collections.singletonList(interval),
|
||||
Segments.INCLUDING_OVERSHADOWED
|
||||
));
|
||||
}
|
||||
|
||||
public Set<DataSegment> getPublished()
|
||||
{
|
||||
return ImmutableSet.copyOf(published);
|
||||
|
|
|
@ -86,6 +86,7 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-multibindings</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
|
@ -478,6 +479,9 @@
|
|||
<plugin>
|
||||
<artifactId>maven-failsafe-plugin</artifactId>
|
||||
<version>3.0.0-M7</version>
|
||||
<configuration>
|
||||
<argLine>--add-opens java.base/java.lang=ALL-UNNAMED</argLine>
|
||||
</configuration>
|
||||
<dependencies>
|
||||
<!-- Required to force Failsafe to use JUnit instead of TestNG.
|
||||
junit47 is required to use test categories. -->
|
||||
|
|
|
@ -99,7 +99,7 @@ public class ITMultiStageQueryWorkerFaultTolerance
|
|||
+ " regionIsoCode\n"
|
||||
+ "FROM TABLE(\n"
|
||||
+ " EXTERN(\n"
|
||||
+ " '{\"type\":\"local\",\"files\":[\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\"]}',\n"
|
||||
+ " '{\"type\":\"local\",\"files\":[\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\",\"/resources/data/batch_index/json/wikipedia_index_data1.json\"]}',\n"
|
||||
+ " '{\"type\":\"json\"}',\n"
|
||||
+ " '[{\"type\":\"string\",\"name\":\"timestamp\"},{\"type\":\"string\",\"name\":\"isRobot\"},{\"type\":\"string\",\"name\":\"diffUrl\"},{\"type\":\"long\",\"name\":\"added\"},{\"type\":\"string\",\"name\":\"countryIsoCode\"},{\"type\":\"string\",\"name\":\"regionName\"},{\"type\":\"string\",\"name\":\"channel\"},{\"type\":\"string\",\"name\":\"flags\"},{\"type\":\"long\",\"name\":\"delta\"},{\"type\":\"string\",\"name\":\"isUnpatrolled\"},{\"type\":\"string\",\"name\":\"isNew\"},{\"type\":\"double\",\"name\":\"deltaBucket\"},{\"type\":\"string\",\"name\":\"isMinor\"},{\"type\":\"string\",\"name\":\"isAnonymous\"},{\"type\":\"long\",\"name\":\"deleted\"},{\"type\":\"string\",\"name\":\"cityName\"},{\"type\":\"long\",\"name\":\"metroCode\"},{\"type\":\"string\",\"name\":\"namespace\"},{\"type\":\"string\",\"name\":\"comment\"},{\"type\":\"string\",\"name\":\"page\"},{\"type\":\"long\",\"name\":\"commentLength\"},{\"type\":\"string\",\"name\":\"countryName\"},{\"type\":\"string\",\"name\":\"user\"},{\"type\":\"string\",\"name\":\"regionIsoCode\"}]'\n"
|
||||
+ " )\n"
|
||||
|
@ -139,7 +139,6 @@ public class ITMultiStageQueryWorkerFaultTolerance
|
|||
|
||||
private void killTaskAbruptly(String taskIdToKill)
|
||||
{
|
||||
|
||||
String command = "jps -mlv | grep -i peon | grep -i " + taskIdToKill + " |awk '{print $1}'";
|
||||
|
||||
ITRetryUtil.retryUntil(() -> {
|
||||
|
@ -158,21 +157,17 @@ public class ITMultiStageQueryWorkerFaultTolerance
|
|||
}
|
||||
String pidToKill = stdOut.lhs.trim();
|
||||
if (pidToKill.length() != 0) {
|
||||
LOG.info("Found PID to kill %s", pidToKill);
|
||||
// kill worker after 5 seconds
|
||||
Thread.sleep(5000);
|
||||
LOG.info("Killing pid %s", pidToKill);
|
||||
druidClusterAdminClient.runCommandInMiddleManagerContainer(
|
||||
final Pair<String, String> killResult = druidClusterAdminClient.runCommandInMiddleManagerContainer(
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"kill -9 " + pidToKill
|
||||
);
|
||||
LOG.info(StringUtils.format("Kill command stdout: %s, stderr: %s", killResult.lhs, killResult.rhs));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}, true, 6000, 50, StringUtils.format("Figuring out PID for task[%s] to kill abruptly", taskIdToKill));
|
||||
|
||||
|
||||
}, true, 2000, 100, StringUtils.format("Figuring out PID for task[%s] to kill abruptly", taskIdToKill));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,22 @@
|
|||
{
|
||||
"query": "SELECT __time, isRobot, added, delta, deleted, namespace FROM %%DATASOURCE%%",
|
||||
"expectedResults": [
|
||||
{
|
||||
"__time": 1377910953000,
|
||||
"isRobot": null,
|
||||
"added": 57,
|
||||
"delta": -143,
|
||||
"deleted": 200,
|
||||
"namespace": "article"
|
||||
},
|
||||
{
|
||||
"__time": 1377910953000,
|
||||
"isRobot": null,
|
||||
"added": 57,
|
||||
"delta": -143,
|
||||
"deleted": 200,
|
||||
"namespace": "article"
|
||||
},
|
||||
{
|
||||
"__time": 1377910953000,
|
||||
"isRobot": null,
|
||||
|
@ -34,6 +50,38 @@
|
|||
"deleted": 129,
|
||||
"namespace": "wikipedia"
|
||||
},
|
||||
{
|
||||
"__time": 1377919965000,
|
||||
"isRobot": null,
|
||||
"added": 459,
|
||||
"delta": 330,
|
||||
"deleted": 129,
|
||||
"namespace": "wikipedia"
|
||||
},
|
||||
{
|
||||
"__time": 1377919965000,
|
||||
"isRobot": null,
|
||||
"added": 459,
|
||||
"delta": 330,
|
||||
"deleted": 129,
|
||||
"namespace": "wikipedia"
|
||||
},
|
||||
{
|
||||
"__time": 1377933081000,
|
||||
"isRobot": null,
|
||||
"added": 123,
|
||||
"delta": 111,
|
||||
"deleted": 12,
|
||||
"namespace": "article"
|
||||
},
|
||||
{
|
||||
"__time": 1377933081000,
|
||||
"isRobot": null,
|
||||
"added": 123,
|
||||
"delta": 111,
|
||||
"deleted": 12,
|
||||
"namespace": "article"
|
||||
},
|
||||
{
|
||||
"__time": 1377933081000,
|
||||
"isRobot": null,
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
# This Dockerfile prefers to use the COPY command over ADD.
|
||||
# See: https://phoenixnap.com/kb/docker-add-vs-copy
|
||||
|
||||
ARG JDK_VERSION=8-slim-buster
|
||||
ARG JDK_VERSION=17-slim-buster
|
||||
|
||||
# The FROM image provides Java on top of Debian, and
|
||||
# thus provides bash, apt-get, etc.
|
||||
|
|
|
@ -88,8 +88,7 @@ if your test has the annotation: `@Test(groups = TestNGGroup.BATCH_INDEX)` then
|
|||
* Add `-pl :druid-integration-tests` when running integration tests for the second time or later without changing
|
||||
the code of core modules in between to skip up-to-date checks for the whole module dependency tree.
|
||||
|
||||
* Integration tests can also be run with either Java 8 or Java 11 by adding `-Djvm.runtime=#` to the `mvn` command, where `#`
|
||||
can either be 8 or 11.
|
||||
* Integration tests can also be run with a specific Java version by adding `-Djvm.runtime=#` to the `mvn` command (where `#` can be 11, for example).
|
||||
|
||||
* Druid's configuration (using Docker) can be overridden by providing `-Doverride.config.path=<PATH_TO_FILE>`.
|
||||
The file must contain one property per line, the key must start with `druid_` and the format should be snake case.
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG JDK_VERSION=8-slim-buster
|
||||
ARG JDK_VERSION=17-slim-buster
|
||||
FROM openjdk:$JDK_VERSION as druidbase
|
||||
|
||||
# Bundle everything into one script so cleanup can reduce image size.
|
||||
|
|
|
@ -281,6 +281,7 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-multibindings</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
|
@ -371,7 +372,6 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-servlet</artifactId>
|
||||
<version>${guice.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
|
@ -772,6 +772,7 @@
|
|||
</property>
|
||||
</properties>
|
||||
<argLine>
|
||||
${jdk.strong.encapsulation.argLine}
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Ddruid.test.config.type=configFile
|
||||
|
|
|
@ -26,7 +26,7 @@ then
|
|||
else
|
||||
echo "\$DRUID_INTEGRATION_TEST_JVM_RUNTIME is set with value ${DRUID_INTEGRATION_TEST_JVM_RUNTIME}"
|
||||
case "${DRUID_INTEGRATION_TEST_JVM_RUNTIME}" in
|
||||
8 | 11 | 17 | 21)
|
||||
11 | 17 | 21)
|
||||
echo "Build druid-cluster with Java $DRUID_INTEGRATION_TEST_JVM_RUNTIME"
|
||||
docker build -t druid/cluster \
|
||||
--build-arg JDK_VERSION=$DRUID_INTEGRATION_TEST_JVM_RUNTIME-slim-buster \
|
||||
|
|
100
licenses.yaml
100
licenses.yaml
|
@ -371,7 +371,7 @@ name: Guice
|
|||
license_category: binary
|
||||
module: java-core
|
||||
license_name: Apache License version 2.0
|
||||
version: 4.1.0
|
||||
version: 4.2.2
|
||||
libraries:
|
||||
- com.google.inject: guice
|
||||
- com.google.inject.extensions: guice-multibindings
|
||||
|
@ -3165,7 +3165,7 @@ libraries:
|
|||
---
|
||||
|
||||
name: Apache Kafka
|
||||
version: 3.6.1
|
||||
version: 3.9.0
|
||||
license_category: binary
|
||||
module: extensions/druid-kafka-indexing-service
|
||||
license_name: Apache License version 2.0
|
||||
|
@ -3174,7 +3174,7 @@ libraries:
|
|||
notices:
|
||||
- kafka-clients: |
|
||||
Apache Kafka
|
||||
Copyright 2023 The Apache Software Foundation.
|
||||
Copyright 2024 The Apache Software Foundation.
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (https://www.apache.org/).
|
||||
|
@ -5125,15 +5125,6 @@ version: 5.2.5
|
|||
|
||||
---
|
||||
|
||||
name: "@druid-toolkit/query"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: Apache License version 2.0
|
||||
copyright: Imply Data
|
||||
version: 0.22.23
|
||||
|
||||
---
|
||||
|
||||
name: "@emotion/cache"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -5224,6 +5215,16 @@ license_file_path: licenses/bin/@emotion-weak-memoize.MIT
|
|||
|
||||
---
|
||||
|
||||
name: "@flatten-js/interval-tree"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: MIT License
|
||||
copyright: Alex Bol
|
||||
version: 1.1.3
|
||||
license_file_path: licenses/bin/@flatten-js-interval-tree.MIT
|
||||
|
||||
---
|
||||
|
||||
name: "@fontsource/open-sans"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -5234,6 +5235,15 @@ license_file_path: licenses/bin/@fontsource-open-sans.OFL
|
|||
|
||||
---
|
||||
|
||||
name: "@internationalized/date"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: Apache License version 2.0
|
||||
copyright: Adobe
|
||||
version: 3.5.6
|
||||
|
||||
---
|
||||
|
||||
name: "@popperjs/core"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -5244,6 +5254,15 @@ license_file_path: licenses/bin/@popperjs-core.MIT
|
|||
|
||||
---
|
||||
|
||||
name: "@swc/helpers"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: Apache License version 2.0
|
||||
copyright: 강동윤
|
||||
version: 0.5.13
|
||||
|
||||
---
|
||||
|
||||
name: "@types/parse-json"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -5404,15 +5423,6 @@ license_file_path: licenses/bin/change-case.MIT
|
|||
|
||||
---
|
||||
|
||||
name: "chronoshift"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: Apache License version 2.0
|
||||
copyright: Vadim Ogievetsky
|
||||
version: 0.10.0
|
||||
|
||||
---
|
||||
|
||||
name: "classnames"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -5702,6 +5712,15 @@ license_file_path: licenses/bin/dot-case.MIT
|
|||
|
||||
---
|
||||
|
||||
name: "druid-query-toolkit"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: Apache License version 2.0
|
||||
copyright: Imply Data
|
||||
version: 1.0.0
|
||||
|
||||
---
|
||||
|
||||
name: "echarts"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -5801,16 +5820,6 @@ license_file_path: licenses/bin/has-flag.MIT
|
|||
|
||||
---
|
||||
|
||||
name: "has-own-prop"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: MIT License
|
||||
copyright: Sindre Sorhus
|
||||
version: 2.0.0
|
||||
license_file_path: licenses/bin/has-own-prop.MIT
|
||||
|
||||
---
|
||||
|
||||
name: "hasown"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -5871,15 +5880,6 @@ license_file_path: licenses/bin/iconv-lite.MIT
|
|||
|
||||
---
|
||||
|
||||
name: "immutable-class"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: Apache License version 2.0
|
||||
copyright: Vadim Ogievetsky
|
||||
version: 0.11.2
|
||||
|
||||
---
|
||||
|
||||
name: "import-fresh"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
@ -6060,26 +6060,6 @@ license_file_path: licenses/bin/mime-types.MIT
|
|||
|
||||
---
|
||||
|
||||
name: "moment-timezone"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: MIT License
|
||||
copyright: Tim Wood
|
||||
version: 0.5.43
|
||||
license_file_path: licenses/bin/moment-timezone.MIT
|
||||
|
||||
---
|
||||
|
||||
name: "moment"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
license_name: MIT License
|
||||
copyright: Iskren Ivov Chernev
|
||||
version: 2.29.4
|
||||
license_file_path: licenses/bin/moment.MIT
|
||||
|
||||
---
|
||||
|
||||
name: "no-case"
|
||||
license_category: binary
|
||||
module: web-console
|
||||
|
|
144
pom.xml
144
pom.xml
|
@ -70,13 +70,12 @@
|
|||
</scm>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<java.version>8</java.version>
|
||||
<java.version>11</java.version>
|
||||
<maven.compiler.release>${java.version}</maven.compiler.release>
|
||||
<project.build.resourceEncoding>UTF-8</project.build.resourceEncoding>
|
||||
<aether.version>0.9.0.M2</aether.version>
|
||||
<apache.curator.version>5.5.0</apache.curator.version>
|
||||
<apache.kafka.version>3.6.1</apache.kafka.version>
|
||||
<apache.kafka.version>3.9.0</apache.kafka.version>
|
||||
<!-- when updating apache ranger, verify the usage of aws-bundle-sdk vs aws-logs-sdk
|
||||
and update as needed in extensions-core/druid-ranger-security/pm.xml -->
|
||||
<apache.ranger.version>2.4.0</apache.ranger.version>
|
||||
|
@ -96,7 +95,7 @@
|
|||
<errorprone.version>2.35.1</errorprone.version>
|
||||
<fastutil.version>8.5.4</fastutil.version>
|
||||
<guava.version>32.0.1-jre</guava.version>
|
||||
<guice.version>4.1.0</guice.version>
|
||||
<guice.version>4.2.2</guice.version>
|
||||
<hamcrest.version>1.3</hamcrest.version>
|
||||
<jetty.version>9.4.56.v20240826</jetty.version>
|
||||
<jersey.version>1.19.4</jersey.version>
|
||||
|
@ -115,10 +114,6 @@
|
|||
<jna-platform.version>5.13.0</jna-platform.version>
|
||||
<hadoop.compile.version>3.3.6</hadoop.compile.version>
|
||||
<mockito.version>5.14.2</mockito.version>
|
||||
<!-- mockito-inline artifact was removed in mockito 5.3 (mockito 5.x is required for Java >17),
|
||||
however it is required in some cases when running against mockito 4.x (mockito 4.x is required for Java <11.
|
||||
We use the following property to pick the proper artifact based on Java version (see pre-java-11 profile) -->
|
||||
<mockito.inline.artifact>core</mockito.inline.artifact>
|
||||
<aws.sdk.version>1.12.638</aws.sdk.version>
|
||||
<caffeine.version>2.8.0</caffeine.version>
|
||||
<jacoco.version>0.8.12</jacoco.version>
|
||||
|
@ -131,7 +126,33 @@
|
|||
<com.google.http.client.apis.version>1.42.3</com.google.http.client.apis.version>
|
||||
<com.google.apis.compute.version>v1-rev20230606-2.0.0</com.google.apis.compute.version>
|
||||
<com.google.cloud.storage.version>2.29.1</com.google.cloud.storage.version>
|
||||
<jdk.strong.encapsulation.argLine><!-- empty placeholder --></jdk.strong.encapsulation.argLine>
|
||||
<jdk.strong.encapsulation.argLine>
|
||||
<!-- Strong encapsulation parameters -->
|
||||
<!-- When updating this list, update all four locations: -->
|
||||
<!-- 1) ForkingTaskRunner#STRONG_ENCAPSULATION_PROPERTIES -->
|
||||
<!-- 2) docs/operations/java.md, "Strong encapsulation" section -->
|
||||
<!-- 3) pom.xml, jdk.strong.encapsulation.argLine (here) -->
|
||||
<!-- 4) examples/bin/run-java script -->
|
||||
|
||||
<!-- required for DataSketches Memory -->
|
||||
--add-exports=java.base/jdk.internal.ref=ALL-UNNAMED
|
||||
--add-exports=java.base/jdk.internal.misc=ALL-UNNAMED
|
||||
--add-opens=java.base/java.nio=ALL-UNNAMED
|
||||
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
|
||||
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED
|
||||
|
||||
<!-- required for NativeIO#getfd -->
|
||||
--add-opens=java.base/java.io=ALL-UNNAMED
|
||||
|
||||
<!-- required for Guice -->
|
||||
--add-opens=java.base/java.lang=ALL-UNNAMED
|
||||
|
||||
<!-- required for metrics -->
|
||||
--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED
|
||||
|
||||
<!-- required for certain EqualsVerifier tests (not required in production) -->
|
||||
--add-opens=java.base/java.util=ALL-UNNAMED
|
||||
</jdk.strong.encapsulation.argLine>
|
||||
<jdk.security.manager.allow.argLine><!-- empty placeholder --></jdk.security.manager.allow.argLine>
|
||||
<repoOrgId>maven.org</repoOrgId>
|
||||
<repoOrgName>Maven Central Repository</repoOrgName>
|
||||
|
@ -271,6 +292,16 @@
|
|||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
|
||||
<!--
|
||||
maven-dependency-plugin:3.1.2 seems to have updated HTTP repository access behavior.
|
||||
We get the following error "Blocked mirror for repositories: [twitter (http://maven.twttr.com, default, releases+snapshots)]"
|
||||
The suggested action step is to add the mirror: https://maven.apache.org/docs/3.8.1/release-notes.html#how-to-fix-when-i-get-a-http-repository-blocked
|
||||
-->
|
||||
<repository>
|
||||
<id>twitter</id>
|
||||
<url>https://maven.twttr.com</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<pluginRepositories>
|
||||
|
@ -1507,13 +1538,13 @@
|
|||
<plugin>
|
||||
<groupId>com.github.spotbugs</groupId>
|
||||
<artifactId>spotbugs-maven-plugin</artifactId>
|
||||
<version>4.2.0</version>
|
||||
<version>4.8.6.6</version>
|
||||
<dependencies>
|
||||
<!-- overwrite dependency on spotbugs if you want to specify the version of spotbugs -->
|
||||
<dependency>
|
||||
<groupId>com.github.spotbugs</groupId>
|
||||
<artifactId>spotbugs</artifactId>
|
||||
<version>4.2.2</version>
|
||||
<version>4.8.6</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<configuration>
|
||||
|
@ -1524,7 +1555,7 @@
|
|||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-pmd-plugin</artifactId>
|
||||
<version>3.16.0</version>
|
||||
<version>3.26.0</version>
|
||||
<configuration>
|
||||
<linkXRef>false</linkXRef> <!-- prevent "Unable to locate Source XRef to link to" warning -->
|
||||
<printFailingErrors>true</printFailingErrors>
|
||||
|
@ -1534,6 +1565,7 @@
|
|||
<excludeRoots>
|
||||
<excludeRoot>target/generated-sources/</excludeRoot>
|
||||
</excludeRoots>
|
||||
<targetJdk>${maven.compiler.release}</targetJdk>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
|
@ -1554,7 +1586,7 @@
|
|||
<bundledSignatures>
|
||||
<!--
|
||||
This will automatically choose the right
|
||||
signatures based on 'maven.compiler.target':
|
||||
signatures based on 'maven.compiler.release':
|
||||
-->
|
||||
<bundledSignature>jdk-unsafe</bundledSignature>
|
||||
</bundledSignatures>
|
||||
|
@ -1622,6 +1654,16 @@
|
|||
<ignore>sun.misc.Unsafe</ignore>
|
||||
<!-- ignore java reflection polymorphic api signatures -->
|
||||
<ignore>java.lang.invoke.MethodHandle</ignore>
|
||||
<!--
|
||||
For the following java.nio.* classes, we get errors like: "Undefined reference: java.nio.ByteBuffer java.nio.ByteBuffer.clear()"
|
||||
GitHub issue: https://github.com/mojohaus/animal-sniffer/issues/4
|
||||
-->
|
||||
<ignore>java.nio.ByteBuffer</ignore>
|
||||
<ignore>java.nio.IntBuffer</ignore>
|
||||
<ignore>java.nio.CharBuffer</ignore>
|
||||
<ignore>java.nio.FloatBuffer</ignore>
|
||||
<ignore>java.nio.DoubleBuffer</ignore>
|
||||
<ignore>java.nio.MappedByteBuffer</ignore>
|
||||
</ignores>
|
||||
</configuration>
|
||||
</execution>
|
||||
|
@ -1830,7 +1872,12 @@
|
|||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<version>3.1.1</version>
|
||||
<version>3.3.0</version>
|
||||
<configuration>
|
||||
<ignoredNonTestScopedDependencies>
|
||||
<ignoredNonTestScopedDependency>*</ignoredNonTestScopedDependency>
|
||||
</ignoredNonTestScopedDependencies>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
|
@ -1916,9 +1963,9 @@
|
|||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<inherited>true</inherited>
|
||||
<configuration>
|
||||
<source>${maven.compiler.source}</source>
|
||||
<target>${maven.compiler.target}</target>
|
||||
<release>${maven.compiler.release}</release>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
|
@ -1931,18 +1978,6 @@
|
|||
</build>
|
||||
|
||||
<profiles>
|
||||
<!-- mockito 5.x dropped support for Java 8, but is necessary to test against Java >17 -->
|
||||
<profile>
|
||||
<id>pre-java-11</id>
|
||||
<activation>
|
||||
<jdk>(,11)</jdk>
|
||||
</activation>
|
||||
<properties>
|
||||
<!-- mockito-inline was removed in mockito 5.3, but is necessary when running against mockito 4.x for Java 8 -->
|
||||
<mockito.version>4.11.0</mockito.version>
|
||||
<mockito.inline.artifact>inline</mockito.inline.artifact>
|
||||
</properties>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>java-12+</id>
|
||||
<activation>
|
||||
|
@ -1956,54 +1991,6 @@
|
|||
</jdk.security.manager.allow.argLine>
|
||||
</properties>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>java-9+</id>
|
||||
<activation>
|
||||
<jdk>[9,)</jdk>
|
||||
</activation>
|
||||
<properties>
|
||||
<jdk.strong.encapsulation.argLine>
|
||||
<!-- Strong encapsulation parameters -->
|
||||
<!-- When updating this list, update all four locations: -->
|
||||
<!-- 1) ForkingTaskRunner#STRONG_ENCAPSULATION_PROPERTIES -->
|
||||
<!-- 2) docs/operations/java.md, "Strong encapsulation" section -->
|
||||
<!-- 3) pom.xml, jdk.strong.encapsulation.argLine (here) -->
|
||||
<!-- 4) examples/bin/run-java script -->
|
||||
|
||||
<!-- required for DataSketches Memory -->
|
||||
--add-exports=java.base/jdk.internal.ref=ALL-UNNAMED
|
||||
--add-exports=java.base/jdk.internal.misc=ALL-UNNAMED
|
||||
--add-opens=java.base/java.nio=ALL-UNNAMED
|
||||
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
|
||||
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED
|
||||
|
||||
<!-- required for NativeIO#getfd -->
|
||||
--add-opens=java.base/java.io=ALL-UNNAMED
|
||||
|
||||
<!-- required for Guice -->
|
||||
--add-opens=java.base/java.lang=ALL-UNNAMED
|
||||
|
||||
<!-- required for metrics -->
|
||||
--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED
|
||||
|
||||
<!-- required for certain EqualsVerifier tests (not required in production) -->
|
||||
--add-opens=java.base/java.util=ALL-UNNAMED
|
||||
</jdk.strong.encapsulation.argLine>
|
||||
</properties>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<inherited>true</inherited>
|
||||
<!-- prefer release instead of source/target in JDK 9 and above -->
|
||||
<configuration>
|
||||
<release>${java.version}</release>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>strict</id>
|
||||
<activation>
|
||||
|
@ -2034,9 +2021,10 @@
|
|||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=java.base/sun.nio.ch=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED</arg>
|
||||
</compilerArgs>
|
||||
</compilerArgs>
|
||||
<annotationProcessorPaths>
|
||||
<path>
|
||||
<groupId>com.google.errorprone</groupId>
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-multibindings</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.code.findbugs</groupId>
|
||||
|
@ -380,18 +381,12 @@
|
|||
<artifactId>caliper</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- explicitly declare mockito-core dependency to make anaylize-dependencies happy when running with Java 8 -->
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<version>${mockito.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-${mockito.inline.artifact}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>nl.jqno.equalsverifier</groupId>
|
||||
<artifactId>equalsverifier</artifactId>
|
||||
|
|
|
@ -59,7 +59,7 @@ NULL : 'null';
|
|||
LONG : [0-9]+;
|
||||
EXP: [eE] [-]? LONG;
|
||||
// DOUBLE provides partial support for java double format
|
||||
// see: https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#valueOf-java.lang.String-
|
||||
// see: https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/Double.html#valueOf(java.lang.String)
|
||||
DOUBLE : 'NaN' | 'Infinity' | (LONG '.' LONG?) | (LONG EXP) | (LONG '.' LONG? EXP);
|
||||
IDENTIFIER : [_$a-zA-Z][_$a-zA-Z0-9]* | '"' (ESC | ~ [\"\\])* '"';
|
||||
WS : [ \t\r\n]+ -> skip ;
|
||||
|
|
|
@ -49,4 +49,9 @@ public interface BlockingPool<T>
|
|||
* @return count of pending requests
|
||||
*/
|
||||
long getPendingRequests();
|
||||
|
||||
/**
|
||||
* @return number of used buffers from the pool
|
||||
*/
|
||||
long getUsedResourcesCount();
|
||||
}
|
||||
|
|
|
@ -119,7 +119,7 @@ public class DefaultBlockingPool<T> implements BlockingPool<T>
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
finally {
|
||||
pendingRequests.incrementAndGet();
|
||||
pendingRequests.decrementAndGet();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -129,6 +129,12 @@ public class DefaultBlockingPool<T> implements BlockingPool<T>
|
|||
return pendingRequests.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getUsedResourcesCount()
|
||||
{
|
||||
return maxSize - objects.size();
|
||||
}
|
||||
|
||||
private List<T> pollObjects(int elementNum) throws InterruptedException
|
||||
{
|
||||
final List<T> list = new ArrayList<>(elementNum);
|
||||
|
|
|
@ -61,4 +61,10 @@ public final class DummyBlockingPool<T> implements BlockingPool<T>
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getUsedResourcesCount()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ import java.security.spec.KeySpec;
|
|||
* using javax.crypto package.
|
||||
*
|
||||
* To learn about possible algorithms supported and their names,
|
||||
* See https://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html
|
||||
* See https://docs.oracle.com/en/java/javase/11/docs/specs/security/standard-names.html
|
||||
*/
|
||||
public class CryptoService
|
||||
{
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.druid.java.util.common.granularity.Granularity;
|
|||
import org.apache.druid.query.OrderBy;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.segment.AggregateProjectionMetadata;
|
||||
import org.apache.druid.segment.Cursors;
|
||||
import org.apache.druid.segment.VirtualColumn;
|
||||
import org.apache.druid.segment.VirtualColumns;
|
||||
import org.apache.druid.segment.column.ColumnHolder;
|
||||
|
@ -39,6 +40,7 @@ import org.apache.druid.utils.CollectionUtils;
|
|||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -72,10 +74,10 @@ public class AggregateProjectionSpec
|
|||
)
|
||||
{
|
||||
this.name = name;
|
||||
if (CollectionUtils.isNullOrEmpty(groupingColumns)) {
|
||||
throw InvalidInput.exception("groupingColumns must not be null or empty");
|
||||
if (CollectionUtils.isNullOrEmpty(groupingColumns) && (aggregators == null || aggregators.length == 0)) {
|
||||
throw InvalidInput.exception("groupingColumns and aggregators must not both be null or empty");
|
||||
}
|
||||
this.groupingColumns = groupingColumns;
|
||||
this.groupingColumns = groupingColumns == null ? Collections.emptyList() : groupingColumns;
|
||||
this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY : virtualColumns;
|
||||
// in the future this should be expanded to support user specified ordering, but for now we compute it based on
|
||||
// the grouping columns, which is consistent with how rollup ordering works for incremental index base table
|
||||
|
@ -169,6 +171,10 @@ public class AggregateProjectionSpec
|
|||
|
||||
private static ProjectionOrdering computeOrdering(VirtualColumns virtualColumns, List<DimensionSchema> groupingColumns)
|
||||
{
|
||||
if (groupingColumns.isEmpty()) {
|
||||
// call it time ordered, there is no grouping columns so there is only 1 row for this projection
|
||||
return new ProjectionOrdering(Cursors.ascendingTimeOrder(), null);
|
||||
}
|
||||
final List<OrderBy> ordering = Lists.newArrayListWithCapacity(groupingColumns.size());
|
||||
|
||||
String timeColumnName = null;
|
||||
|
|
|
@ -808,7 +808,7 @@ public class ParallelMergeCombiningSequence<T> extends YieldingSequenceBase<T>
|
|||
/**
|
||||
* {@link ForkJoinPool} friendly {@link BlockingQueue} feeder, adapted from 'QueueTaker' of Java documentation on
|
||||
* {@link ForkJoinPool.ManagedBlocker},
|
||||
* https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ForkJoinPool.ManagedBlocker.html
|
||||
* https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/concurrent/ForkJoinPool.ManagedBlocker.html
|
||||
*/
|
||||
static class QueuePusher<E> implements ForkJoinPool.ManagedBlocker
|
||||
{
|
||||
|
|
|
@ -661,4 +661,13 @@ public class QueryContext
|
|||
"context=" + context +
|
||||
'}';
|
||||
}
|
||||
|
||||
public boolean isDecoupledMode()
|
||||
{
|
||||
String value = getString(
|
||||
QueryContexts.CTX_NATIVE_QUERY_SQL_PLANNING_MODE,
|
||||
QueryContexts.NATIVE_QUERY_SQL_PLANNING_MODE_COUPLED
|
||||
);
|
||||
return QueryContexts.NATIVE_QUERY_SQL_PLANNING_MODE_DECOUPLED.equals(value);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.Numbers;
|
|||
import org.apache.druid.java.util.common.StringUtils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
@ -105,6 +106,10 @@ public class QueryContexts
|
|||
// SQL statement resource specific keys
|
||||
public static final String CTX_EXECUTION_MODE = "executionMode";
|
||||
|
||||
public static final String CTX_NATIVE_QUERY_SQL_PLANNING_MODE = "plannerStrategy";
|
||||
public static final String NATIVE_QUERY_SQL_PLANNING_MODE_COUPLED = "COUPLED";
|
||||
public static final String NATIVE_QUERY_SQL_PLANNING_MODE_DECOUPLED = "DECOUPLED";
|
||||
|
||||
// Defaults
|
||||
public static final boolean DEFAULT_BY_SEGMENT = false;
|
||||
public static final boolean DEFAULT_POPULATE_CACHE = true;
|
||||
|
|
|
@ -198,4 +198,9 @@ public class UnionDataSource implements DataSource
|
|||
"dataSources=" + dataSources +
|
||||
'}';
|
||||
}
|
||||
|
||||
public static boolean isCompatibleDataSource(DataSource dataSource)
|
||||
{
|
||||
return (dataSource instanceof TableDataSource || dataSource instanceof InlineDataSource);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,18 +19,17 @@
|
|||
|
||||
package org.apache.druid.query.aggregation;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import org.apache.druid.error.DruidException;
|
||||
import org.apache.druid.segment.serde.cell.IOIterator;
|
||||
import org.apache.druid.segment.serde.cell.IntSerializer;
|
||||
import org.apache.druid.segment.serde.cell.StagedSerde;
|
||||
import org.apache.druid.segment.writeout.WriteOutBytes;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.IntBuffer;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/**
|
||||
|
@ -45,109 +44,181 @@ public class SerializedStorage<T>
|
|||
{
|
||||
private final WriteOutBytes writeOutBytes;
|
||||
private final StagedSerde<T> serde;
|
||||
private final IntSerializer intSerializer = new IntSerializer();
|
||||
private final ByteBuffer itemOffsetsBytes;
|
||||
private final IntBuffer itemSizes;
|
||||
|
||||
private final LongArrayList rowChunkOffsets = new LongArrayList();
|
||||
private int numStored = 0;
|
||||
private int maxSize = 0;
|
||||
|
||||
public SerializedStorage(WriteOutBytes writeOutBytes, StagedSerde<T> serde)
|
||||
{
|
||||
this(writeOutBytes, serde, 4096);
|
||||
}
|
||||
|
||||
public SerializedStorage(WriteOutBytes writeOutBytes, StagedSerde<T> serde, int chunkSize)
|
||||
{
|
||||
this.writeOutBytes = writeOutBytes;
|
||||
this.serde = serde;
|
||||
|
||||
this.itemOffsetsBytes = ByteBuffer.allocate(chunkSize * Integer.BYTES).order(ByteOrder.nativeOrder());
|
||||
this.itemSizes = itemOffsetsBytes.asIntBuffer();
|
||||
}
|
||||
|
||||
public void store(@Nullable T value) throws IOException
|
||||
{
|
||||
byte[] bytes = serde.serialize(value);
|
||||
|
||||
writeOutBytes.write(intSerializer.serialize(bytes.length));
|
||||
writeOutBytes.write(bytes);
|
||||
maxSize = Math.max(maxSize, bytes.length);
|
||||
itemSizes.put(bytes.length);
|
||||
if (bytes.length > 0) {
|
||||
writeOutBytes.write(bytes);
|
||||
}
|
||||
|
||||
++numStored;
|
||||
if (itemSizes.remaining() == 0) {
|
||||
rowChunkOffsets.add(writeOutBytes.size());
|
||||
writeOutBytes.write(itemOffsetsBytes);
|
||||
itemOffsetsBytes.clear();
|
||||
itemSizes.clear();
|
||||
}
|
||||
}
|
||||
|
||||
public int numStored()
|
||||
{
|
||||
return numStored;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an iterator over everything that has been stored. Also signifies the end of storing objects.
|
||||
* iterator() can be called multiple times if needed, but after iterator() is called, store() can no longer be
|
||||
* called.
|
||||
*
|
||||
* @return an iterator
|
||||
* @throws IOException on failure
|
||||
*/
|
||||
public IOIterator<T> iterator() throws IOException
|
||||
{
|
||||
return new DeserializingIOIterator<>(writeOutBytes.asInputStream(), serde);
|
||||
if (itemSizes.position() != itemSizes.limit()) {
|
||||
rowChunkOffsets.add(writeOutBytes.size());
|
||||
itemOffsetsBytes.limit(itemSizes.position() * Integer.BYTES);
|
||||
writeOutBytes.write(itemOffsetsBytes);
|
||||
|
||||
// Move the limit to the position so that we fail subsequent writes and indicate that we are done
|
||||
itemSizes.limit(itemSizes.position());
|
||||
}
|
||||
|
||||
return new DeserializingIOIterator<>(
|
||||
writeOutBytes,
|
||||
rowChunkOffsets,
|
||||
numStored,
|
||||
itemSizes.capacity(),
|
||||
maxSize,
|
||||
serde
|
||||
);
|
||||
}
|
||||
|
||||
private static class DeserializingIOIterator<T> implements IOIterator<T>
|
||||
{
|
||||
private static final int NEEDS_READ = -2;
|
||||
private static final int EOF = -1;
|
||||
private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0).asReadOnlyBuffer();
|
||||
|
||||
private final byte[] intBytes;
|
||||
private final BufferedInputStream inputStream;
|
||||
private final WriteOutBytes medium;
|
||||
private final LongArrayList rowChunkOffsets;
|
||||
private final int numEntries;
|
||||
private ByteBuffer tmpBuf;
|
||||
private final StagedSerde<T> serde;
|
||||
|
||||
private int nextSize;
|
||||
private final ByteBuffer itemOffsetsBytes;
|
||||
private final int[] itemSizes;
|
||||
|
||||
public DeserializingIOIterator(InputStream inputStream, StagedSerde<T> serde)
|
||||
private long itemStartOffset;
|
||||
private int chunkId = 0;
|
||||
private int currId = 0;
|
||||
private int itemIndex;
|
||||
|
||||
public DeserializingIOIterator(
|
||||
WriteOutBytes medium,
|
||||
LongArrayList rowChunkOffsets,
|
||||
int numEntries,
|
||||
int chunkSize,
|
||||
int maxSize,
|
||||
StagedSerde<T> serde
|
||||
)
|
||||
{
|
||||
this.inputStream = new BufferedInputStream(inputStream);
|
||||
this.medium = medium;
|
||||
this.rowChunkOffsets = rowChunkOffsets;
|
||||
this.numEntries = numEntries;
|
||||
this.tmpBuf = ByteBuffer.allocate(maxSize).order(ByteOrder.nativeOrder());
|
||||
this.serde = serde;
|
||||
intBytes = new byte[Integer.BYTES];
|
||||
nextSize = NEEDS_READ;
|
||||
|
||||
this.itemOffsetsBytes = ByteBuffer.allocate(chunkSize * Integer.BYTES).order(ByteOrder.nativeOrder());
|
||||
this.itemSizes = new int[chunkSize];
|
||||
this.itemIndex = chunkSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws IOException
|
||||
public boolean hasNext()
|
||||
{
|
||||
return getNextSize() > EOF;
|
||||
return currId < numEntries;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() throws IOException
|
||||
{
|
||||
int currentNextSize = getNextSize();
|
||||
|
||||
if (currentNextSize == -1) {
|
||||
throw new NoSuchElementException("end of buffer reached");
|
||||
if (currId >= numEntries) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
|
||||
byte[] nextBytes = new byte[currentNextSize];
|
||||
int bytesRead = 0;
|
||||
|
||||
while (bytesRead < currentNextSize) {
|
||||
int result = inputStream.read(nextBytes, bytesRead, currentNextSize - bytesRead);
|
||||
|
||||
if (result == -1) {
|
||||
throw new NoSuchElementException("unexpected end of buffer reached");
|
||||
}
|
||||
|
||||
bytesRead += result;
|
||||
}
|
||||
|
||||
Preconditions.checkState(bytesRead == currentNextSize);
|
||||
T value = serde.deserialize(nextBytes);
|
||||
|
||||
nextSize = NEEDS_READ;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private int getNextSize() throws IOException
|
||||
{
|
||||
if (nextSize == NEEDS_READ) {
|
||||
int bytesRead = 0;
|
||||
|
||||
while (bytesRead < Integer.BYTES) {
|
||||
int result = inputStream.read(intBytes, bytesRead, Integer.BYTES - bytesRead);
|
||||
|
||||
if (result == -1) {
|
||||
nextSize = EOF;
|
||||
return EOF;
|
||||
} else {
|
||||
bytesRead += result;
|
||||
if (itemIndex >= itemSizes.length) {
|
||||
if (chunkId == 0) {
|
||||
itemStartOffset = 0;
|
||||
} else {
|
||||
if (itemStartOffset != rowChunkOffsets.getLong(chunkId - 1)) {
|
||||
throw DruidException.defensive(
|
||||
"Should have read up to the start of the offsets [%,d], "
|
||||
+ "but for some reason the values [%,d] don't align. Possible corruption?",
|
||||
rowChunkOffsets.getLong(chunkId - 1),
|
||||
itemStartOffset
|
||||
);
|
||||
}
|
||||
itemStartOffset += (((long) itemSizes.length) * Integer.BYTES);
|
||||
}
|
||||
Preconditions.checkState(bytesRead == Integer.BYTES);
|
||||
|
||||
nextSize = ByteBuffer.wrap(intBytes).order(ByteOrder.nativeOrder()).getInt();
|
||||
int numToRead = Math.min(itemSizes.length, numEntries - (chunkId * itemSizes.length));
|
||||
final long readOffset = rowChunkOffsets.getLong(chunkId++);
|
||||
itemOffsetsBytes.clear();
|
||||
itemOffsetsBytes.limit(numToRead * Integer.BYTES);
|
||||
medium.readFully(readOffset, itemOffsetsBytes);
|
||||
itemOffsetsBytes.flip();
|
||||
itemOffsetsBytes.asIntBuffer().get(itemSizes, 0, numToRead);
|
||||
|
||||
itemIndex = 0;
|
||||
}
|
||||
|
||||
return nextSize;
|
||||
int bytesToRead = itemSizes[itemIndex];
|
||||
final T retVal;
|
||||
if (bytesToRead == 0) {
|
||||
retVal = serde.deserialize(EMPTY_BUFFER);
|
||||
} else {
|
||||
tmpBuf.clear();
|
||||
tmpBuf.limit(bytesToRead);
|
||||
medium.readFully(itemStartOffset, tmpBuf);
|
||||
tmpBuf.flip();
|
||||
|
||||
retVal = serde.deserialize(tmpBuf);
|
||||
}
|
||||
|
||||
itemStartOffset += bytesToRead;
|
||||
++itemIndex;
|
||||
++currId;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException
|
||||
public void close()
|
||||
{
|
||||
inputStream.close();
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,6 +100,7 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
private final GroupByQueryConfig queryConfig;
|
||||
private final GroupByQueryMetricsFactory queryMetricsFactory;
|
||||
private final GroupByResourcesReservationPool groupByResourcesReservationPool;
|
||||
private final GroupByStatsProvider groupByStatsProvider;
|
||||
|
||||
@VisibleForTesting
|
||||
public GroupByQueryQueryToolChest(
|
||||
|
@ -111,7 +112,24 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
groupingEngine,
|
||||
GroupByQueryConfig::new,
|
||||
DefaultGroupByQueryMetricsFactory.instance(),
|
||||
groupByResourcesReservationPool
|
||||
groupByResourcesReservationPool,
|
||||
new GroupByStatsProvider()
|
||||
);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public GroupByQueryQueryToolChest(
|
||||
GroupingEngine groupingEngine,
|
||||
GroupByResourcesReservationPool groupByResourcesReservationPool,
|
||||
GroupByStatsProvider groupByStatsProvider
|
||||
)
|
||||
{
|
||||
this(
|
||||
groupingEngine,
|
||||
GroupByQueryConfig::new,
|
||||
DefaultGroupByQueryMetricsFactory.instance(),
|
||||
groupByResourcesReservationPool,
|
||||
groupByStatsProvider
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -120,13 +138,15 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
GroupingEngine groupingEngine,
|
||||
Supplier<GroupByQueryConfig> queryConfigSupplier,
|
||||
GroupByQueryMetricsFactory queryMetricsFactory,
|
||||
@Merging GroupByResourcesReservationPool groupByResourcesReservationPool
|
||||
@Merging GroupByResourcesReservationPool groupByResourcesReservationPool,
|
||||
GroupByStatsProvider groupByStatsProvider
|
||||
)
|
||||
{
|
||||
this.groupingEngine = groupingEngine;
|
||||
this.queryConfig = queryConfigSupplier.get();
|
||||
this.queryMetricsFactory = queryMetricsFactory;
|
||||
this.groupByResourcesReservationPool = groupByResourcesReservationPool;
|
||||
this.groupByStatsProvider = groupByStatsProvider;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -170,7 +190,15 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
{
|
||||
// Reserve the group by resources (merge buffers) required for executing the query
|
||||
final QueryResourceId queryResourceId = query.context().getQueryResourceId();
|
||||
groupByResourcesReservationPool.reserve(queryResourceId, query, willMergeRunner);
|
||||
final GroupByStatsProvider.PerQueryStats perQueryStats =
|
||||
groupByStatsProvider.getPerQueryStatsContainer(query.context().getQueryResourceId());
|
||||
|
||||
groupByResourcesReservationPool.reserve(
|
||||
queryResourceId,
|
||||
query,
|
||||
willMergeRunner,
|
||||
perQueryStats
|
||||
);
|
||||
|
||||
final GroupByQueryResources resource = groupByResourcesReservationPool.fetch(queryResourceId);
|
||||
if (resource == null) {
|
||||
|
@ -180,16 +208,20 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
);
|
||||
}
|
||||
try {
|
||||
Closer closer = Closer.create();
|
||||
|
||||
final Sequence<ResultRow> mergedSequence = mergeGroupByResults(
|
||||
query,
|
||||
resource,
|
||||
runner,
|
||||
context
|
||||
context,
|
||||
closer,
|
||||
perQueryStats
|
||||
);
|
||||
Closer closer = Closer.create();
|
||||
|
||||
// Clean up the resources reserved during the execution of the query
|
||||
closer.register(() -> groupByResourcesReservationPool.clean(queryResourceId));
|
||||
closer.register(() -> groupByStatsProvider.closeQuery(query.context().getQueryResourceId()));
|
||||
return Sequences.withBaggage(mergedSequence, closer);
|
||||
}
|
||||
catch (Exception e) {
|
||||
|
@ -203,20 +235,24 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
final GroupByQuery query,
|
||||
GroupByQueryResources resource,
|
||||
QueryRunner<ResultRow> runner,
|
||||
ResponseContext context
|
||||
ResponseContext context,
|
||||
Closer closer,
|
||||
GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
if (isNestedQueryPushDown(query)) {
|
||||
return mergeResultsWithNestedQueryPushDown(query, resource, runner, context);
|
||||
return mergeResultsWithNestedQueryPushDown(query, resource, runner, context, perQueryStats);
|
||||
}
|
||||
return mergeGroupByResultsWithoutPushDown(query, resource, runner, context);
|
||||
return mergeGroupByResultsWithoutPushDown(query, resource, runner, context, closer, perQueryStats);
|
||||
}
|
||||
|
||||
private Sequence<ResultRow> mergeGroupByResultsWithoutPushDown(
|
||||
GroupByQuery query,
|
||||
GroupByQueryResources resource,
|
||||
QueryRunner<ResultRow> runner,
|
||||
ResponseContext context
|
||||
ResponseContext context,
|
||||
Closer closer,
|
||||
GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
// If there's a subquery, merge subquery results and then apply the aggregator
|
||||
|
@ -241,6 +277,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
}
|
||||
subqueryContext.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
|
||||
subquery = (GroupByQuery) ((QueryDataSource) dataSource).getQuery().withOverriddenContext(subqueryContext);
|
||||
|
||||
closer.register(() -> groupByStatsProvider.closeQuery(subquery.context().getQueryResourceId()));
|
||||
}
|
||||
catch (ClassCastException e) {
|
||||
throw new UnsupportedOperationException("Subqueries must be of type 'group by'");
|
||||
|
@ -250,7 +288,9 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
subquery,
|
||||
resource,
|
||||
runner,
|
||||
context
|
||||
context,
|
||||
closer,
|
||||
perQueryStats
|
||||
);
|
||||
|
||||
final Sequence<ResultRow> finalizingResults = finalizeSubqueryResults(subqueryResult, subquery);
|
||||
|
@ -259,7 +299,14 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
return groupingEngine.processSubtotalsSpec(
|
||||
query,
|
||||
resource,
|
||||
groupingEngine.processSubqueryResult(subquery, query, resource, finalizingResults, false)
|
||||
groupingEngine.processSubqueryResult(
|
||||
subquery,
|
||||
query, resource,
|
||||
finalizingResults,
|
||||
false,
|
||||
perQueryStats
|
||||
),
|
||||
perQueryStats
|
||||
);
|
||||
} else {
|
||||
return groupingEngine.applyPostProcessing(
|
||||
|
@ -268,7 +315,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
query,
|
||||
resource,
|
||||
finalizingResults,
|
||||
false
|
||||
false,
|
||||
perQueryStats
|
||||
),
|
||||
query
|
||||
);
|
||||
|
@ -279,7 +327,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
return groupingEngine.processSubtotalsSpec(
|
||||
query,
|
||||
resource,
|
||||
groupingEngine.mergeResults(runner, query.withSubtotalsSpec(null), context)
|
||||
groupingEngine.mergeResults(runner, query.withSubtotalsSpec(null), context),
|
||||
perQueryStats
|
||||
);
|
||||
} else {
|
||||
return groupingEngine.applyPostProcessing(groupingEngine.mergeResults(runner, query, context), query);
|
||||
|
@ -291,7 +340,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
GroupByQuery query,
|
||||
GroupByQueryResources resource,
|
||||
QueryRunner<ResultRow> runner,
|
||||
ResponseContext context
|
||||
ResponseContext context,
|
||||
GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
Sequence<ResultRow> pushDownQueryResults = groupingEngine.mergeResults(runner, query, context);
|
||||
|
@ -303,7 +353,8 @@ public class GroupByQueryQueryToolChest extends QueryToolChest<ResultRow, GroupB
|
|||
rewrittenQuery,
|
||||
resource,
|
||||
finalizedResults,
|
||||
true
|
||||
true,
|
||||
perQueryStats
|
||||
),
|
||||
query
|
||||
);
|
||||
|
|
|
@ -86,17 +86,17 @@ public class GroupByResourcesReservationPool
|
|||
/**
|
||||
* Map of query's resource id -> group by resources reserved for the query to execute
|
||||
*/
|
||||
final ConcurrentHashMap<QueryResourceId, AtomicReference<GroupByQueryResources>> pool = new ConcurrentHashMap<>();
|
||||
private final ConcurrentHashMap<QueryResourceId, AtomicReference<GroupByQueryResources>> pool = new ConcurrentHashMap<>();
|
||||
|
||||
/**
|
||||
* Buffer pool from where the merge buffers are picked and reserved
|
||||
*/
|
||||
final BlockingPool<ByteBuffer> mergeBufferPool;
|
||||
private final BlockingPool<ByteBuffer> mergeBufferPool;
|
||||
|
||||
/**
|
||||
* Group by query config of the server
|
||||
*/
|
||||
final GroupByQueryConfig groupByQueryConfig;
|
||||
private final GroupByQueryConfig groupByQueryConfig;
|
||||
|
||||
@Inject
|
||||
public GroupByResourcesReservationPool(
|
||||
|
@ -112,8 +112,14 @@ public class GroupByResourcesReservationPool
|
|||
* Reserves appropriate resources, and maps it to the queryResourceId (usually the query's resource id) in the internal map.
|
||||
* This is a blocking call, and can block up to the given query's timeout
|
||||
*/
|
||||
public void reserve(QueryResourceId queryResourceId, GroupByQuery groupByQuery, boolean willMergeRunner)
|
||||
public void reserve(
|
||||
QueryResourceId queryResourceId,
|
||||
GroupByQuery groupByQuery,
|
||||
boolean willMergeRunner,
|
||||
GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
long startNs = System.nanoTime();
|
||||
if (queryResourceId == null) {
|
||||
throw DruidException.defensive("Query resource id must be populated");
|
||||
}
|
||||
|
@ -145,6 +151,8 @@ public class GroupByResourcesReservationPool
|
|||
// Resources have been allocated, spot has been reserved. The reference would ALWAYS refer to 'null'. Refer the
|
||||
// allocated resources from it
|
||||
reference.compareAndSet(null, resources);
|
||||
|
||||
perQueryStats.mergeBufferAcquisitionTime(System.nanoTime() - startNs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.groupby;
|
||||
|
||||
import org.apache.druid.guice.LazySingleton;
|
||||
import org.apache.druid.query.QueryResourceId;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
/**
|
||||
* Metrics collector for groupBy queries like spilled bytes, merge buffer acquistion time, dictionary size.
|
||||
*/
|
||||
@LazySingleton
|
||||
public class GroupByStatsProvider
|
||||
{
|
||||
private final Map<QueryResourceId, PerQueryStats> perQueryStats;
|
||||
private final AggregateStats aggregateStatsContainer;
|
||||
|
||||
public GroupByStatsProvider()
|
||||
{
|
||||
this.perQueryStats = new ConcurrentHashMap<>();
|
||||
this.aggregateStatsContainer = new AggregateStats();
|
||||
}
|
||||
|
||||
public PerQueryStats getPerQueryStatsContainer(QueryResourceId resourceId)
|
||||
{
|
||||
if (resourceId == null) {
|
||||
return null;
|
||||
}
|
||||
return perQueryStats.computeIfAbsent(resourceId, value -> new PerQueryStats());
|
||||
}
|
||||
|
||||
public synchronized void closeQuery(QueryResourceId resourceId)
|
||||
{
|
||||
if (resourceId == null || !perQueryStats.containsKey(resourceId)) {
|
||||
return;
|
||||
}
|
||||
PerQueryStats container = perQueryStats.remove(resourceId);
|
||||
aggregateStatsContainer.addQueryStats(container);
|
||||
}
|
||||
|
||||
public synchronized AggregateStats getStatsSince()
|
||||
{
|
||||
return aggregateStatsContainer.reset();
|
||||
}
|
||||
|
||||
public static class AggregateStats
|
||||
{
|
||||
private long mergeBufferQueries = 0;
|
||||
private long mergeBufferAcquisitionTimeNs = 0;
|
||||
private long spilledQueries = 0;
|
||||
private long spilledBytes = 0;
|
||||
private long mergeDictionarySize = 0;
|
||||
|
||||
public AggregateStats()
|
||||
{
|
||||
}
|
||||
|
||||
public AggregateStats(
|
||||
long mergeBufferQueries,
|
||||
long mergeBufferAcquisitionTimeNs,
|
||||
long spilledQueries,
|
||||
long spilledBytes,
|
||||
long mergeDictionarySize
|
||||
)
|
||||
{
|
||||
this.mergeBufferQueries = mergeBufferQueries;
|
||||
this.mergeBufferAcquisitionTimeNs = mergeBufferAcquisitionTimeNs;
|
||||
this.spilledQueries = spilledQueries;
|
||||
this.spilledBytes = spilledBytes;
|
||||
this.mergeDictionarySize = mergeDictionarySize;
|
||||
}
|
||||
|
||||
public long getMergeBufferQueries()
|
||||
{
|
||||
return mergeBufferQueries;
|
||||
}
|
||||
|
||||
public long getMergeBufferAcquisitionTimeNs()
|
||||
{
|
||||
return mergeBufferAcquisitionTimeNs;
|
||||
}
|
||||
|
||||
public long getSpilledQueries()
|
||||
{
|
||||
return spilledQueries;
|
||||
}
|
||||
|
||||
public long getSpilledBytes()
|
||||
{
|
||||
return spilledBytes;
|
||||
}
|
||||
|
||||
public long getMergeDictionarySize()
|
||||
{
|
||||
return mergeDictionarySize;
|
||||
}
|
||||
|
||||
public void addQueryStats(PerQueryStats perQueryStats)
|
||||
{
|
||||
if (perQueryStats.getMergeBufferAcquisitionTimeNs() > 0) {
|
||||
mergeBufferQueries++;
|
||||
mergeBufferAcquisitionTimeNs += perQueryStats.getMergeBufferAcquisitionTimeNs();
|
||||
}
|
||||
|
||||
if (perQueryStats.getSpilledBytes() > 0) {
|
||||
spilledQueries++;
|
||||
spilledBytes += perQueryStats.getSpilledBytes();
|
||||
}
|
||||
|
||||
mergeDictionarySize += perQueryStats.getMergeDictionarySize();
|
||||
}
|
||||
|
||||
public AggregateStats reset()
|
||||
{
|
||||
AggregateStats aggregateStats =
|
||||
new AggregateStats(
|
||||
mergeBufferQueries,
|
||||
mergeBufferAcquisitionTimeNs,
|
||||
spilledQueries,
|
||||
spilledBytes,
|
||||
mergeDictionarySize
|
||||
);
|
||||
|
||||
this.mergeBufferQueries = 0;
|
||||
this.mergeBufferAcquisitionTimeNs = 0;
|
||||
this.spilledQueries = 0;
|
||||
this.spilledBytes = 0;
|
||||
this.mergeDictionarySize = 0;
|
||||
|
||||
return aggregateStats;
|
||||
}
|
||||
}
|
||||
|
||||
public static class PerQueryStats
|
||||
{
|
||||
private final AtomicLong mergeBufferAcquisitionTimeNs = new AtomicLong(0);
|
||||
private final AtomicLong spilledBytes = new AtomicLong(0);
|
||||
private final AtomicLong mergeDictionarySize = new AtomicLong(0);
|
||||
|
||||
public void mergeBufferAcquisitionTime(long delay)
|
||||
{
|
||||
mergeBufferAcquisitionTimeNs.addAndGet(delay);
|
||||
}
|
||||
|
||||
public void spilledBytes(long bytes)
|
||||
{
|
||||
spilledBytes.addAndGet(bytes);
|
||||
}
|
||||
|
||||
public void dictionarySize(long size)
|
||||
{
|
||||
mergeDictionarySize.addAndGet(size);
|
||||
}
|
||||
|
||||
public long getMergeBufferAcquisitionTimeNs()
|
||||
{
|
||||
return mergeBufferAcquisitionTimeNs.get();
|
||||
}
|
||||
|
||||
public long getSpilledBytes()
|
||||
{
|
||||
return spilledBytes.get();
|
||||
}
|
||||
|
||||
public long getMergeDictionarySize()
|
||||
{
|
||||
return mergeDictionarySize.get();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -121,6 +121,7 @@ public class GroupingEngine
|
|||
private final ObjectMapper jsonMapper;
|
||||
private final ObjectMapper spillMapper;
|
||||
private final QueryWatcher queryWatcher;
|
||||
private final GroupByStatsProvider groupByStatsProvider;
|
||||
|
||||
@Inject
|
||||
public GroupingEngine(
|
||||
|
@ -129,7 +130,8 @@ public class GroupingEngine
|
|||
@Merging GroupByResourcesReservationPool groupByResourcesReservationPool,
|
||||
@Json ObjectMapper jsonMapper,
|
||||
@Smile ObjectMapper spillMapper,
|
||||
QueryWatcher queryWatcher
|
||||
QueryWatcher queryWatcher,
|
||||
GroupByStatsProvider groupByStatsProvider
|
||||
)
|
||||
{
|
||||
this.processingConfig = processingConfig;
|
||||
|
@ -138,6 +140,7 @@ public class GroupingEngine
|
|||
this.jsonMapper = jsonMapper;
|
||||
this.spillMapper = spillMapper;
|
||||
this.queryWatcher = queryWatcher;
|
||||
this.groupByStatsProvider = groupByStatsProvider;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -452,7 +455,8 @@ public class GroupingEngine
|
|||
processingConfig.getNumThreads(),
|
||||
processingConfig.intermediateComputeSizeBytes(),
|
||||
spillMapper,
|
||||
processingConfig.getTmpDir()
|
||||
processingConfig.getTmpDir(),
|
||||
groupByStatsProvider
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -587,7 +591,8 @@ public class GroupingEngine
|
|||
GroupByQuery query,
|
||||
GroupByQueryResources resource,
|
||||
Sequence<ResultRow> subqueryResult,
|
||||
boolean wasQueryPushedDown
|
||||
boolean wasQueryPushedDown,
|
||||
GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
// Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
|
||||
|
@ -614,7 +619,8 @@ public class GroupingEngine
|
|||
resource,
|
||||
spillMapper,
|
||||
processingConfig.getTmpDir(),
|
||||
processingConfig.intermediateComputeSizeBytes()
|
||||
processingConfig.intermediateComputeSizeBytes(),
|
||||
perQueryStats
|
||||
);
|
||||
|
||||
final GroupByRowProcessor.ResultSupplier finalResultSupplier = resultSupplier;
|
||||
|
@ -644,7 +650,8 @@ public class GroupingEngine
|
|||
public Sequence<ResultRow> processSubtotalsSpec(
|
||||
GroupByQuery query,
|
||||
GroupByQueryResources resource,
|
||||
Sequence<ResultRow> queryResult
|
||||
Sequence<ResultRow> queryResult,
|
||||
GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
// How it works?
|
||||
|
@ -695,7 +702,8 @@ public class GroupingEngine
|
|||
resource,
|
||||
spillMapper,
|
||||
processingConfig.getTmpDir(),
|
||||
processingConfig.intermediateComputeSizeBytes()
|
||||
processingConfig.intermediateComputeSizeBytes(),
|
||||
perQueryStats
|
||||
);
|
||||
|
||||
List<String> queryDimNamesInOrder = baseSubtotalQuery.getDimensionNamesInOrder();
|
||||
|
@ -757,7 +765,8 @@ public class GroupingEngine
|
|||
resource,
|
||||
spillMapper,
|
||||
processingConfig.getTmpDir(),
|
||||
processingConfig.intermediateComputeSizeBytes()
|
||||
processingConfig.intermediateComputeSizeBytes(),
|
||||
perQueryStats
|
||||
);
|
||||
|
||||
subtotalsResults.add(
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.druid.query.QueryInterruptedException;
|
|||
import org.apache.druid.query.QueryTimeoutException;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||
import org.apache.druid.segment.ColumnSelectorFactory;
|
||||
|
||||
|
@ -94,6 +95,7 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
|
|||
@Nullable
|
||||
private final ParallelCombiner<KeyType> parallelCombiner;
|
||||
private final boolean mergeThreadLocal;
|
||||
private final GroupByStatsProvider.PerQueryStats perQueryStats;
|
||||
|
||||
private volatile boolean initialized = false;
|
||||
|
||||
|
@ -113,7 +115,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
|
|||
final ListeningExecutorService executor,
|
||||
final int priority,
|
||||
final boolean hasQueryTimeout,
|
||||
final long queryTimeoutAt
|
||||
final long queryTimeoutAt,
|
||||
final GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
this(
|
||||
|
@ -137,7 +140,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
|
|||
queryTimeoutAt,
|
||||
groupByQueryConfig.getIntermediateCombineDegree(),
|
||||
groupByQueryConfig.getNumParallelCombineThreads(),
|
||||
groupByQueryConfig.isMergeThreadLocal()
|
||||
groupByQueryConfig.isMergeThreadLocal(),
|
||||
perQueryStats
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -162,7 +166,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
|
|||
final long queryTimeoutAt,
|
||||
final int intermediateCombineDegree,
|
||||
final int numParallelCombineThreads,
|
||||
final boolean mergeThreadLocal
|
||||
final boolean mergeThreadLocal,
|
||||
final GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
Preconditions.checkArgument(concurrencyHint > 0, "concurrencyHint > 0");
|
||||
|
@ -212,6 +217,7 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
|
|||
}
|
||||
|
||||
this.mergeThreadLocal = mergeThreadLocal;
|
||||
this.perQueryStats = perQueryStats;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -238,7 +244,8 @@ public class ConcurrentGrouper<KeyType> implements Grouper<KeyType>
|
|||
false,
|
||||
limitSpec,
|
||||
sortHasNonGroupingFields,
|
||||
sliceSize
|
||||
sliceSize,
|
||||
perQueryStats
|
||||
);
|
||||
grouper.init();
|
||||
groupers.add(grouper);
|
||||
|
|
|
@ -59,6 +59,7 @@ import org.apache.druid.query.groupby.GroupByQuery;
|
|||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
import org.apache.druid.query.groupby.GroupByQueryResources;
|
||||
import org.apache.druid.query.groupby.GroupByResourcesReservationPool;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey;
|
||||
|
||||
|
@ -103,6 +104,7 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
|
|||
private final ObjectMapper spillMapper;
|
||||
private final String processingTmpDir;
|
||||
private final int mergeBufferSize;
|
||||
private final GroupByStatsProvider groupByStatsProvider;
|
||||
|
||||
public GroupByMergingQueryRunner(
|
||||
GroupByQueryConfig config,
|
||||
|
@ -114,7 +116,8 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
|
|||
int concurrencyHint,
|
||||
int mergeBufferSize,
|
||||
ObjectMapper spillMapper,
|
||||
String processingTmpDir
|
||||
String processingTmpDir,
|
||||
GroupByStatsProvider groupByStatsProvider
|
||||
)
|
||||
{
|
||||
this.config = config;
|
||||
|
@ -127,6 +130,7 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
|
|||
this.spillMapper = spillMapper;
|
||||
this.processingTmpDir = processingTmpDir;
|
||||
this.mergeBufferSize = mergeBufferSize;
|
||||
this.groupByStatsProvider = groupByStatsProvider;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -163,6 +167,9 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
|
|||
StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId())
|
||||
);
|
||||
|
||||
GroupByStatsProvider.PerQueryStats perQueryStats =
|
||||
groupByStatsProvider.getPerQueryStatsContainer(query.context().getQueryResourceId());
|
||||
|
||||
final int priority = queryContext.getPriority();
|
||||
|
||||
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
|
||||
|
@ -182,8 +189,10 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
|
|||
try {
|
||||
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(
|
||||
temporaryStorageDirectory,
|
||||
querySpecificConfig.getMaxOnDiskStorage().getBytes()
|
||||
querySpecificConfig.getMaxOnDiskStorage().getBytes(),
|
||||
perQueryStats
|
||||
);
|
||||
|
||||
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder =
|
||||
ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
|
||||
resources.register(temporaryStorageHolder);
|
||||
|
@ -215,7 +224,8 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
|
|||
priority,
|
||||
hasTimeout,
|
||||
timeoutAt,
|
||||
mergeBufferSize
|
||||
mergeBufferSize,
|
||||
perQueryStats
|
||||
);
|
||||
final Grouper<RowBasedKey> grouper = pair.lhs;
|
||||
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
|
||||
|
@ -318,8 +328,8 @@ public class GroupByMergingQueryRunner implements QueryRunner<ResultRow>
|
|||
GroupByQueryResources resource = groupByResourcesReservationPool.fetch(queryResourceId);
|
||||
if (resource == null) {
|
||||
throw DruidException.defensive(
|
||||
"Expected merge buffers to be reserved in the reservation pool for the query id [%s] however while executing "
|
||||
+ "the GroupByMergingQueryRunner, however none were provided.",
|
||||
"Expected merge buffers to be reserved in the reservation pool for the query resource id [%s] however while executing "
|
||||
+ "the GroupByMergingQueryRunner none were provided.",
|
||||
queryResourceId
|
||||
);
|
||||
}
|
||||
|
|
|
@ -879,6 +879,12 @@ public class GroupByQueryEngine
|
|||
return ImmutableList.of();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long getDictionarySize()
|
||||
{
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer createKey()
|
||||
{
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.druid.query.dimension.DimensionSpec;
|
|||
import org.apache.druid.query.groupby.GroupByQuery;
|
||||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
import org.apache.druid.query.groupby.GroupByQueryResources;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey;
|
||||
|
||||
|
@ -93,7 +94,8 @@ public class GroupByRowProcessor
|
|||
final GroupByQueryResources resource,
|
||||
final ObjectMapper spillMapper,
|
||||
final String processingTmpDir,
|
||||
final int mergeBufferSize
|
||||
final int mergeBufferSize,
|
||||
final GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
final Closer closeOnExit = Closer.create();
|
||||
|
@ -106,7 +108,8 @@ public class GroupByRowProcessor
|
|||
|
||||
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(
|
||||
temporaryStorageDirectory,
|
||||
querySpecificConfig.getMaxOnDiskStorage().getBytes()
|
||||
querySpecificConfig.getMaxOnDiskStorage().getBytes(),
|
||||
perQueryStats
|
||||
);
|
||||
|
||||
closeOnExit.register(temporaryStorage);
|
||||
|
@ -128,7 +131,8 @@ public class GroupByRowProcessor
|
|||
},
|
||||
temporaryStorage,
|
||||
spillMapper,
|
||||
mergeBufferSize
|
||||
mergeBufferSize,
|
||||
perQueryStats
|
||||
);
|
||||
final Grouper<RowBasedKey> grouper = pair.lhs;
|
||||
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
|
||||
|
|
|
@ -184,6 +184,11 @@ public interface Grouper<KeyType> extends Closeable
|
|||
*/
|
||||
List<String> getDictionary();
|
||||
|
||||
/**
|
||||
* Return the estimated size of the dictionary of this KeySerde.
|
||||
*/
|
||||
Long getDictionarySize();
|
||||
|
||||
/**
|
||||
* Serialize a key. This will be called by the {@link #aggregate(Object)} method. The buffer will not
|
||||
* be retained after the aggregate method returns, so reusing buffers is OK.
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.FileUtils;
|
|||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
|
@ -47,6 +48,8 @@ public class LimitedTemporaryStorage implements Closeable
|
|||
{
|
||||
private static final Logger log = new Logger(LimitedTemporaryStorage.class);
|
||||
|
||||
private final GroupByStatsProvider.PerQueryStats perQueryStatsContainer;
|
||||
|
||||
private final File storageDirectory;
|
||||
private final long maxBytesUsed;
|
||||
|
||||
|
@ -57,10 +60,15 @@ public class LimitedTemporaryStorage implements Closeable
|
|||
|
||||
private boolean createdStorageDirectory = false;
|
||||
|
||||
public LimitedTemporaryStorage(File storageDirectory, long maxBytesUsed)
|
||||
public LimitedTemporaryStorage(
|
||||
File storageDirectory,
|
||||
long maxBytesUsed,
|
||||
GroupByStatsProvider.PerQueryStats perQueryStatsContainer
|
||||
)
|
||||
{
|
||||
this.storageDirectory = storageDirectory;
|
||||
this.maxBytesUsed = maxBytesUsed;
|
||||
this.perQueryStatsContainer = perQueryStatsContainer;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -121,7 +129,7 @@ public class LimitedTemporaryStorage implements Closeable
|
|||
}
|
||||
|
||||
@VisibleForTesting
|
||||
long currentSize()
|
||||
public long currentSize()
|
||||
{
|
||||
return bytesUsed.get();
|
||||
}
|
||||
|
@ -134,6 +142,11 @@ public class LimitedTemporaryStorage implements Closeable
|
|||
return;
|
||||
}
|
||||
closed = true;
|
||||
|
||||
perQueryStatsContainer.spilledBytes(bytesUsed.get());
|
||||
|
||||
bytesUsed.set(0);
|
||||
|
||||
for (File file : ImmutableSet.copyOf(files)) {
|
||||
delete(file);
|
||||
}
|
||||
|
@ -199,6 +212,5 @@ public class LimitedTemporaryStorage implements Closeable
|
|||
throw new TemporaryStorageFullException(maxBytesUsed);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.druid.query.filter.Filter;
|
|||
import org.apache.druid.query.filter.ValueMatcher;
|
||||
import org.apache.druid.query.groupby.GroupByQuery;
|
||||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator;
|
||||
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||
|
@ -131,7 +132,8 @@ public class RowBasedGrouperHelper
|
|||
final Supplier<ByteBuffer> bufferSupplier,
|
||||
final LimitedTemporaryStorage temporaryStorage,
|
||||
final ObjectMapper spillMapper,
|
||||
final int mergeBufferSize
|
||||
final int mergeBufferSize,
|
||||
final GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
return createGrouperAccumulatorPair(
|
||||
|
@ -148,7 +150,8 @@ public class RowBasedGrouperHelper
|
|||
UNKNOWN_THREAD_PRIORITY,
|
||||
false,
|
||||
UNKNOWN_TIMEOUT,
|
||||
mergeBufferSize
|
||||
mergeBufferSize,
|
||||
perQueryStats
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -197,7 +200,8 @@ public class RowBasedGrouperHelper
|
|||
final int priority,
|
||||
final boolean hasQueryTimeout,
|
||||
final long queryTimeoutAt,
|
||||
final int mergeBufferSize
|
||||
final int mergeBufferSize,
|
||||
final GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
|
||||
|
@ -276,7 +280,8 @@ public class RowBasedGrouperHelper
|
|||
true,
|
||||
limitSpec,
|
||||
sortHasNonGroupingFields,
|
||||
mergeBufferSize
|
||||
mergeBufferSize,
|
||||
perQueryStats
|
||||
);
|
||||
} else {
|
||||
final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(
|
||||
|
@ -305,7 +310,8 @@ public class RowBasedGrouperHelper
|
|||
grouperSorter,
|
||||
priority,
|
||||
hasQueryTimeout,
|
||||
queryTimeoutAt
|
||||
queryTimeoutAt,
|
||||
perQueryStats
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1282,6 +1288,12 @@ public class RowBasedGrouperHelper
|
|||
return dictionary;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long getDictionarySize()
|
||||
{
|
||||
return currentEstimatedSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer toByteBuffer(RowBasedKey key)
|
||||
{
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.druid.java.util.common.parsers.CloseableIterator;
|
|||
import org.apache.druid.query.BaseQuery;
|
||||
import org.apache.druid.query.aggregation.AggregatorAdapters;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.groupby.GroupByStatsProvider;
|
||||
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||
import org.apache.druid.segment.ColumnSelectorFactory;
|
||||
|
||||
|
@ -74,6 +75,7 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
|
|||
private final AggregatorFactory[] aggregatorFactories;
|
||||
private final Comparator<Grouper.Entry<KeyType>> keyObjComparator;
|
||||
private final Comparator<Grouper.Entry<KeyType>> defaultOrderKeyObjComparator;
|
||||
private final GroupByStatsProvider.PerQueryStats perQueryStats;
|
||||
|
||||
private final List<File> files = new ArrayList<>();
|
||||
private final List<File> dictionaryFiles = new ArrayList<>();
|
||||
|
@ -95,7 +97,8 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
|
|||
final boolean spillingAllowed,
|
||||
final DefaultLimitSpec limitSpec,
|
||||
final boolean sortHasNonGroupingFields,
|
||||
final int mergeBufferSize
|
||||
final int mergeBufferSize,
|
||||
final GroupByStatsProvider.PerQueryStats perQueryStats
|
||||
)
|
||||
{
|
||||
this.keySerde = keySerdeFactory.factorize();
|
||||
|
@ -155,6 +158,7 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
|
|||
this.spillMapper = keySerde.decorateObjectMapper(spillMapper);
|
||||
this.spillingAllowed = spillingAllowed;
|
||||
this.sortHasNonGroupingFields = sortHasNonGroupingFields;
|
||||
this.perQueryStats = perQueryStats;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -214,6 +218,7 @@ public class SpillingGrouper<KeyType> implements Grouper<KeyType>
|
|||
@Override
|
||||
public void close()
|
||||
{
|
||||
perQueryStats.dictionarySize(keySerde.getDictionarySize());
|
||||
grouper.close();
|
||||
keySerde.reset();
|
||||
deleteFiles();
|
||||
|
|
|
@ -112,12 +112,8 @@ public class CursorFactoryRowsAndColumns implements CloseableShapeshifter, RowsA
|
|||
cursor.advance();
|
||||
}
|
||||
|
||||
if (writer == null) {
|
||||
return new EmptyRowsAndColumns();
|
||||
} else {
|
||||
final byte[] bytes = writer.toByteArray();
|
||||
return new ColumnBasedFrameRowsAndColumns(Frame.wrap(bytes), rowSignature);
|
||||
}
|
||||
final byte[] bytes = writer.toByteArray();
|
||||
return new ColumnBasedFrameRowsAndColumns(Frame.wrap(bytes), rowSignature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -185,14 +185,13 @@ public class TimeseriesQuery extends BaseQuery<Result<TimeseriesResultValue>>
|
|||
@Override
|
||||
public RowSignature getResultRowSignature(Finalization finalization)
|
||||
{
|
||||
final Finalization finalization1 = finalization;
|
||||
final RowSignature.Builder builder = RowSignature.builder();
|
||||
builder.addTimeColumn();
|
||||
String timestampResultField = getTimestampResultField();
|
||||
if (StringUtils.isNotEmpty(timestampResultField)) {
|
||||
builder.add(timestampResultField, ColumnType.LONG);
|
||||
}
|
||||
builder.addAggregators(aggregatorSpecs, finalization1);
|
||||
builder.addAggregators(aggregatorSpecs, finalization);
|
||||
builder.addPostAggregators(postAggregatorSpecs);
|
||||
return builder.build();
|
||||
}
|
||||
|
|
|
@ -96,6 +96,7 @@ public class UnionQueryLogic implements QueryLogic
|
|||
Sequence run = runner.run(queryPlus.withQuery(q), responseContext);
|
||||
seqs.add(run);
|
||||
}
|
||||
|
||||
return Sequences.concat(seqs);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ import com.google.common.collect.Interner;
|
|||
import com.google.common.collect.Interners;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.druid.data.input.impl.AggregateProjectionSpec;
|
||||
import org.apache.druid.error.InvalidInput;
|
||||
import org.apache.druid.error.DruidException;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
import org.apache.druid.java.util.common.granularity.Granularity;
|
||||
import org.apache.druid.query.OrderBy;
|
||||
|
@ -40,6 +40,7 @@ import org.apache.druid.utils.CollectionUtils;
|
|||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
@ -167,17 +168,17 @@ public class AggregateProjectionMetadata
|
|||
@JsonProperty("name") String name,
|
||||
@JsonProperty("timeColumnName") @Nullable String timeColumnName,
|
||||
@JsonProperty("virtualColumns") @Nullable VirtualColumns virtualColumns,
|
||||
@JsonProperty("groupingColumns") List<String> groupingColumns,
|
||||
@JsonProperty("groupingColumns") @Nullable List<String> groupingColumns,
|
||||
@JsonProperty("aggregators") @Nullable AggregatorFactory[] aggregators,
|
||||
@JsonProperty("ordering") List<OrderBy> ordering
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
if (CollectionUtils.isNullOrEmpty(groupingColumns)) {
|
||||
throw InvalidInput.exception("groupingColumns must not be null or empty");
|
||||
if (CollectionUtils.isNullOrEmpty(groupingColumns) && (aggregators == null || aggregators.length == 0)) {
|
||||
throw DruidException.defensive("groupingColumns and aggregators must not both be null or empty");
|
||||
}
|
||||
this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY : virtualColumns;
|
||||
this.groupingColumns = groupingColumns;
|
||||
this.groupingColumns = groupingColumns == null ? Collections.emptyList() : groupingColumns;
|
||||
this.aggregators = aggregators == null ? new AggregatorFactory[0] : aggregators;
|
||||
this.ordering = ordering;
|
||||
|
||||
|
|
|
@ -456,7 +456,8 @@ public class IndexIO
|
|||
new StringUtf8DictionaryEncodedColumnSupplier<>(
|
||||
index.getDimValueUtf8Lookup(dimension)::singleThreaded,
|
||||
null,
|
||||
Suppliers.ofInstance(index.getDimColumn(dimension))
|
||||
Suppliers.ofInstance(index.getDimColumn(dimension)),
|
||||
LEGACY_FACTORY.getBitmapFactory()
|
||||
)
|
||||
);
|
||||
GenericIndexed<ImmutableBitmap> bitmaps = index.getBitmapIndexes().get(dimension);
|
||||
|
|
|
@ -43,7 +43,7 @@ import java.util.Objects;
|
|||
*/
|
||||
public class IndexSpec
|
||||
{
|
||||
public static IndexSpec DEFAULT = IndexSpec.builder().build();
|
||||
public static final IndexSpec DEFAULT = IndexSpec.builder().build();
|
||||
|
||||
public static Builder builder()
|
||||
{
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.apache.druid.segment.column;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.druid.collections.bitmap.BitmapFactory;
|
||||
import org.apache.druid.common.semantic.SemanticUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.extraction.ExtractionFn;
|
||||
|
@ -73,16 +74,19 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum
|
|||
@Nullable
|
||||
private final ColumnarMultiInts multiValueColumn;
|
||||
private final Indexed<ByteBuffer> utf8Dictionary;
|
||||
private final BitmapFactory bitmapFactory;
|
||||
|
||||
public StringUtf8DictionaryEncodedColumn(
|
||||
@Nullable ColumnarInts singleValueColumn,
|
||||
@Nullable ColumnarMultiInts multiValueColumn,
|
||||
Indexed<ByteBuffer> utf8Dictionary
|
||||
Indexed<ByteBuffer> utf8Dictionary,
|
||||
BitmapFactory bitmapFactory
|
||||
)
|
||||
{
|
||||
this.column = singleValueColumn;
|
||||
this.multiValueColumn = multiValueColumn;
|
||||
this.utf8Dictionary = utf8Dictionary;
|
||||
this.bitmapFactory = bitmapFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -135,6 +139,11 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum
|
|||
return utf8Dictionary.size();
|
||||
}
|
||||
|
||||
public BitmapFactory getBitmapFactory()
|
||||
{
|
||||
return bitmapFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HistoricalDimensionSelector makeDimensionSelector(
|
||||
final ReadableOffset offset,
|
||||
|
|
|
@ -36,6 +36,7 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
|
|||
|
||||
// The number of doubles per buffer.
|
||||
private final int sizePer;
|
||||
private final CompressionStrategy strategy;
|
||||
|
||||
public BlockLayoutColumnarDoublesSupplier(
|
||||
int totalSize,
|
||||
|
@ -45,7 +46,8 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
|
|||
CompressionStrategy strategy
|
||||
)
|
||||
{
|
||||
baseDoubleBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(byteOrder, strategy));
|
||||
this.strategy = strategy;
|
||||
this.baseDoubleBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(byteOrder, strategy));
|
||||
this.totalSize = totalSize;
|
||||
this.sizePer = sizePer;
|
||||
}
|
||||
|
@ -78,7 +80,8 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
|
|||
}
|
||||
}
|
||||
|
||||
private class BlockLayoutColumnarDoubles implements ColumnarDoubles
|
||||
// This needs to be a public class so that SemanticCreator is able to call it.
|
||||
public class BlockLayoutColumnarDoubles implements ColumnarDoubles
|
||||
{
|
||||
final Indexed<ResourceHolder<ByteBuffer>> singleThreadedDoubleBuffers = baseDoubleBuffers.singleThreaded();
|
||||
|
||||
|
@ -91,6 +94,11 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier<ColumnarDoub
|
|||
@Nullable
|
||||
DoubleBuffer doubleBuffer;
|
||||
|
||||
public CompressionStrategy getCompressionStrategy()
|
||||
{
|
||||
return strategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size()
|
||||
{
|
||||
|
|
|
@ -43,6 +43,7 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
|
|||
// The number of longs per buffer.
|
||||
private final int sizePer;
|
||||
private final CompressionFactory.LongEncodingReader baseReader;
|
||||
private final CompressionStrategy strategy;
|
||||
|
||||
public BlockLayoutColumnarLongsSupplier(
|
||||
int totalSize,
|
||||
|
@ -53,6 +54,7 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
|
|||
CompressionStrategy strategy
|
||||
)
|
||||
{
|
||||
this.strategy = strategy;
|
||||
this.baseLongBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(order, strategy));
|
||||
this.totalSize = totalSize;
|
||||
this.sizePer = sizePer;
|
||||
|
@ -124,7 +126,8 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
|
|||
}
|
||||
}
|
||||
|
||||
private class BlockLayoutColumnarLongs implements ColumnarLongs
|
||||
// This needs to be a public class so that SemanticCreator is able to call it.
|
||||
public class BlockLayoutColumnarLongs implements ColumnarLongs
|
||||
{
|
||||
final CompressionFactory.LongEncodingReader reader = baseReader.duplicate();
|
||||
final Indexed<ResourceHolder<ByteBuffer>> singleThreadedLongBuffers = baseLongBuffers.singleThreaded();
|
||||
|
@ -140,6 +143,16 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier<ColumnarLongs>
|
|||
@Nullable
|
||||
LongBuffer longBuffer;
|
||||
|
||||
public CompressionFactory.LongEncodingStrategy getEncodingStrategy()
|
||||
{
|
||||
return baseReader.getStrategy();
|
||||
}
|
||||
|
||||
public CompressionStrategy getCompressionStrategy()
|
||||
{
|
||||
return strategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size()
|
||||
{
|
||||
|
|
|
@ -308,7 +308,7 @@ public class CompressedVSizeColumnarIntsSupplier implements WritableSupplier<Col
|
|||
}
|
||||
}
|
||||
|
||||
private class CompressedVSizeColumnarInts implements ColumnarInts
|
||||
public class CompressedVSizeColumnarInts implements ColumnarInts
|
||||
{
|
||||
final Indexed<ResourceHolder<ByteBuffer>> singleThreadedBuffers = baseBuffers.singleThreaded();
|
||||
|
||||
|
@ -329,6 +329,11 @@ public class CompressedVSizeColumnarIntsSupplier implements WritableSupplier<Col
|
|||
return totalSize;
|
||||
}
|
||||
|
||||
public CompressionStrategy getCompressionStrategy()
|
||||
{
|
||||
return compression;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value at the given index into the column.
|
||||
* <p/>
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue