2018-11-13 12:38:37 -05:00
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2014-12-19 18:53:19 -05:00
|
|
|
language: java
|
|
|
|
|
2019-07-08 15:24:51 -04:00
|
|
|
sudo: true
|
|
|
|
dist: xenial
|
2017-07-19 02:19:33 -04:00
|
|
|
|
2014-12-19 18:53:19 -05:00
|
|
|
jdk:
|
2019-05-10 20:09:55 -04:00
|
|
|
- openjdk8
|
2015-01-07 18:07:14 -05:00
|
|
|
|
2015-08-07 21:05:25 -04:00
|
|
|
cache:
|
|
|
|
directories:
|
2017-05-20 14:42:44 -04:00
|
|
|
- $HOME/.m2
|
2018-07-31 15:56:57 -04:00
|
|
|
|
2019-08-07 12:52:42 -04:00
|
|
|
env:
|
|
|
|
global:
|
|
|
|
- DOCKER_IP=127.0.0.1 # for integration tests
|
|
|
|
- MVN="mvn -B"
|
|
|
|
- > # Various options to make execution of maven goals faster (e.g., mvn install)
|
|
|
|
MAVEN_SKIP="
|
|
|
|
-Danimal.sniffer.skip=true
|
|
|
|
-Dcheckstyle.skip=true
|
|
|
|
-Ddruid.console.skip=true
|
|
|
|
-Denforcer.skip=true
|
|
|
|
-Dforbiddenapis.skip=true
|
|
|
|
-Dmaven.javadoc.skip=true
|
|
|
|
-Dpmd.skip=true
|
|
|
|
-Dspotbugs.skip=true
|
|
|
|
"
|
2019-08-20 18:36:19 -04:00
|
|
|
- MAVEN_SKIP_TESTS="-DskipTests -Djacoco.skip=true"
|
2019-08-07 12:52:42 -04:00
|
|
|
|
|
|
|
# Add various options to make 'mvn install' fast and skip javascript compile (-Ddruid.console.skip=true) since it is not
|
2019-08-08 21:13:18 -04:00
|
|
|
# needed. Depending on network speeds, "mvn -q install" may take longer than the default 10 minute timeout to print any
|
|
|
|
# output. To compensate, use travis_wait to extend the timeout.
|
2019-08-20 18:36:19 -04:00
|
|
|
install: MAVEN_OPTS='-Xmx3000m' travis_wait 15 ${MVN} clean install -q -ff ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
jobs:
|
2017-05-31 21:27:34 -04:00
|
|
|
include:
|
2019-08-07 12:52:42 -04:00
|
|
|
- name: "animal sniffer checks"
|
2019-08-20 18:36:19 -04:00
|
|
|
script: ${MVN} animal-sniffer:check --fail-at-end
|
2019-08-07 12:52:42 -04:00
|
|
|
|
|
|
|
- name: "checkstyle"
|
2019-08-20 18:36:19 -04:00
|
|
|
script: ${MVN} checkstyle:checkstyle --fail-at-end
|
2019-08-07 12:52:42 -04:00
|
|
|
|
|
|
|
- name: "enforcer checks"
|
2019-08-20 18:36:19 -04:00
|
|
|
script: ${MVN} enforcer:enforce --fail-at-end
|
2019-08-07 12:52:42 -04:00
|
|
|
|
|
|
|
- name: "forbidden api checks"
|
2019-08-20 18:36:19 -04:00
|
|
|
script: ${MVN} forbiddenapis:check forbiddenapis:testCheck --fail-at-end
|
2019-08-07 12:52:42 -04:00
|
|
|
|
|
|
|
- name: "pmd checks"
|
2019-08-20 18:36:19 -04:00
|
|
|
script: ${MVN} pmd:check --fail-at-end # TODO: consider adding pmd:cpd-check
|
2019-08-07 12:52:42 -04:00
|
|
|
|
|
|
|
- name: "spotbugs checks"
|
2019-08-20 18:36:19 -04:00
|
|
|
script: ${MVN} spotbugs:check --fail-at-end -pl '!benchmarks'
|
2019-08-07 12:52:42 -04:00
|
|
|
|
|
|
|
- name: "license checks"
|
|
|
|
install: skip
|
2019-08-12 16:25:08 -04:00
|
|
|
before_script: &setup_generate_license
|
|
|
|
- sudo apt-get update && sudo apt-get install python3 python3-pip python3-setuptools -y
|
|
|
|
- pip3 install wheel # install wheel first explicitly
|
|
|
|
- pip3 install pyyaml
|
|
|
|
script:
|
|
|
|
- >
|
2019-08-20 18:36:19 -04:00
|
|
|
${MVN} apache-rat:check -Prat --fail-at-end
|
2019-08-12 16:25:08 -04:00
|
|
|
-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
|
|
|
|
-Drat.consoleOutput=true
|
|
|
|
# Generate dependency reports and checks they are valid. When running on Travis CI, 2 cores are available
|
|
|
|
# (https://docs.travis-ci.com/user/reference/overview/#virtualisation-environment-vs-operating-system).
|
|
|
|
- mkdir -p target
|
2019-08-21 15:46:27 -04:00
|
|
|
- distribution/bin/generate-license-dependency-reports.py . target --clean-maven-artifact-transfer --parallel 2
|
2019-08-30 02:27:43 -04:00
|
|
|
- distribution/bin/check-licenses.py licenses.yaml target/license-reports
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- &compile_strict
|
|
|
|
name: "(openjdk8) strict compilation"
|
2019-08-07 12:52:42 -04:00
|
|
|
install: skip
|
2017-06-26 21:51:48 -04:00
|
|
|
# Strict compilation requires more than 2 GB
|
2019-08-07 12:52:42 -04:00
|
|
|
script: >
|
2019-08-20 18:36:19 -04:00
|
|
|
MAVEN_OPTS='-Xmx3000m' ${MVN} clean -Pstrict compile test-compile --fail-at-end
|
|
|
|
-pl '!benchmarks' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
|
2017-06-26 21:51:48 -04:00
|
|
|
|
2019-09-09 17:37:21 -04:00
|
|
|
- name: "analyze dependencies"
|
|
|
|
script: MAVEN_OPTS='-Xmx3000m' ${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true
|
|
|
|
after_failure: |-
|
|
|
|
echo "FAILURE EXPLANATION:
|
|
|
|
|
|
|
|
The dependency analysis has found a dependency that is either:
|
|
|
|
|
|
|
|
1) Used and undeclared: These are available as a transitive dependency but should be explicitly
|
|
|
|
added to the POM to ensure the dependency version. The XML to add the dependencies to the POM is
|
|
|
|
shown above.
|
|
|
|
|
|
|
|
2) Unused and declared: These are not needed and removing them from the POM will speed up the build
|
|
|
|
and reduce the artifact size. The dependencies to remove are shown above.
|
|
|
|
|
|
|
|
If there are false positive dependency analysis warnings, they can be suppressed:
|
|
|
|
https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html#usedDependencies
|
|
|
|
https://maven.apache.org/plugins/maven-dependency-plugin/examples/exclude-dependencies-from-dependency-analysis.html
|
|
|
|
|
|
|
|
For more information, refer to:
|
|
|
|
https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html
|
|
|
|
"
|
|
|
|
|
2019-12-05 17:34:35 -05:00
|
|
|
- name: "security vulnerabilities"
|
|
|
|
install: skip
|
|
|
|
script: ${MVN} dependency-check:check
|
|
|
|
after_failure: |-
|
|
|
|
echo "FAILURE EXPLANATION:
|
|
|
|
|
|
|
|
The OWASP dependency check has found security vulnerabilities. Please use a newer version
|
|
|
|
of the dependency that does not have vulenerabilities. If the analysis has false positives,
|
|
|
|
they can be suppressed by adding entries to owasp-dependency-check-suppressions.xml (for more
|
|
|
|
information, see https://jeremylong.github.io/DependencyCheck/general/suppression.html).
|
|
|
|
"
|
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- &package
|
|
|
|
name: "(openjdk8) packaging check"
|
2019-08-12 16:25:08 -04:00
|
|
|
install: skip
|
|
|
|
before_script: *setup_generate_license
|
|
|
|
script: >
|
2019-08-20 18:36:19 -04:00
|
|
|
MAVEN_OPTS='-Xmx3000m' ${MVN} clean install -Pdist -Pbundle-contrib-exts --fail-at-end
|
|
|
|
-pl '!benchmarks' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -Ddruid.console.skip=false -T1C
|
2019-04-07 23:12:23 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *package
|
|
|
|
name: "(openjdk11) packaging check"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_processing_module
|
|
|
|
name: "(openjdk8) processing module test"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: &processing_env
|
|
|
|
- MAVEN_PROJECTS='processing'
|
|
|
|
before_script: &setup_java_test
|
|
|
|
- unset _JAVA_OPTIONS
|
|
|
|
script: &run_java_test
|
|
|
|
# Set MAVEN_OPTS for Surefire launcher. Skip remoteresources to avoid intermittent connection timeouts when
|
|
|
|
# resolving the SIGAR dependency.
|
|
|
|
- >
|
2019-08-20 18:36:19 -04:00
|
|
|
MAVEN_OPTS='-Xmx800m' ${MVN} test -pl ${MAVEN_PROJECTS}
|
2019-08-07 12:52:42 -04:00
|
|
|
${MAVEN_SKIP} -Dremoteresources.skip=true
|
2018-08-11 01:03:36 -04:00
|
|
|
- sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0"
|
|
|
|
- free -m
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: &upload_java_unit_test_coverage
|
|
|
|
- ${MVN} -pl ${MAVEN_PROJECTS} jacoco:report
|
|
|
|
# retry in case of network error
|
|
|
|
- travis_retry curl -o codecov.sh -s https://codecov.io/bash
|
|
|
|
- travis_retry bash codecov.sh -X gcov
|
2017-06-26 21:51:48 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_processing_module
|
|
|
|
name: "(openjdk11) processing module test"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_processing_module_sqlcompat
|
|
|
|
name: "(openjdk8) processing module test (SQL Compatibility)"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: *processing_env
|
|
|
|
before_script: *setup_java_test
|
|
|
|
script: &run_java_sql_compat_test
|
|
|
|
# Set MAVEN_OPTS for Surefire launcher. Skip remoteresources to avoid intermittent connection timeouts when
|
|
|
|
# resolving the SIGAR dependency.
|
|
|
|
- >
|
2019-08-20 18:36:19 -04:00
|
|
|
MAVEN_OPTS='-Xmx800m' ${MVN} test -pl ${MAVEN_PROJECTS} -Ddruid.generic.useDefaultValueForNull=false
|
2019-08-07 12:52:42 -04:00
|
|
|
${MAVEN_SKIP} -Dremoteresources.skip=true
|
2018-08-11 01:03:36 -04:00
|
|
|
- sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0"
|
|
|
|
- free -m
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: *upload_java_unit_test_coverage
|
2018-08-02 11:20:25 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_processing_module_sqlcompat
|
|
|
|
name: "(openjdk11) processing module test (SQL Compatibility)"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_indexing_module
|
|
|
|
name: "(openjdk8) indexing modules test"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: &indexing_env
|
|
|
|
- MAVEN_PROJECTS='indexing-hadoop,indexing-service,extensions-core/kafka-indexing-service,extensions-core/kinesis-indexing-service'
|
|
|
|
before_script: *setup_java_test
|
|
|
|
script: *run_java_test
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: *upload_java_unit_test_coverage
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_indexing_module
|
|
|
|
name: "(openjdk11) indexing modules test"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_indexing_module_sqlcompat
|
|
|
|
name: "(openjdk8) indexing modules test (SQL Compatibility)"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: *indexing_env
|
|
|
|
before_script: *setup_java_test
|
|
|
|
script: *run_java_sql_compat_test
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: *upload_java_unit_test_coverage
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_indexing_module_sqlcompat
|
|
|
|
name: "(openjdk11) indexing modules test (SQL Compatibility)"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_server_module
|
|
|
|
name: "(openjdk8) server module test"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: &server_env
|
|
|
|
- MAVEN_PROJECTS='server'
|
|
|
|
before_script: *setup_java_test
|
|
|
|
script: *run_java_test
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: *upload_java_unit_test_coverage
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_server_module
|
|
|
|
name: "(openjdk11) server module test"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_server_module_sqlcompat
|
|
|
|
name: "(openjdk8) server module test (SQL Compatibility)"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: *server_env
|
|
|
|
before_script: *setup_java_test
|
|
|
|
script: *run_java_sql_compat_test
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: *upload_java_unit_test_coverage
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_server_module_sqlcompat
|
|
|
|
name: "(openjdk11) server module test (SQL Compatibility)"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_modules
|
|
|
|
name: "(openjdk8) other modules test"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: &other_env
|
|
|
|
- MAVEN_PROJECTS='!processing,!indexing-hadoop,!indexing-service,!extensions-core/kafka-indexing-service,!extensions-core/kinesis-indexing-service,!server,!web-console'
|
|
|
|
before_script: *setup_java_test
|
|
|
|
script: *run_java_test
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: *upload_java_unit_test_coverage
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_modules
|
|
|
|
name: "(openjdk11) other modules test"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_modules_sqlcompat
|
|
|
|
name: "(openjdk8) other modules test (SQL Compatibility)"
|
2019-08-07 12:52:42 -04:00
|
|
|
env: *other_env
|
|
|
|
before_script: *setup_java_test
|
|
|
|
script: *run_java_sql_compat_test
|
2019-08-28 11:49:30 -04:00
|
|
|
after_success: *upload_java_unit_test_coverage
|
2019-08-07 12:52:42 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- <<: *test_modules_sqlcompat
|
|
|
|
name: "(openjdk11) other modules test (SQL Compatibility)"
|
|
|
|
jdk: openjdk11
|
|
|
|
|
|
|
|
- &test_webconsole
|
|
|
|
name: "web console"
|
2019-08-27 15:03:48 -04:00
|
|
|
install: skip
|
2019-08-28 11:49:30 -04:00
|
|
|
script:
|
|
|
|
- ${MVN} test -pl 'web-console'
|
|
|
|
after_success:
|
|
|
|
- (cd web-console && travis_retry npm run codecov) # retry in case of network error
|
2018-08-02 11:20:25 -04:00
|
|
|
|
2019-08-21 00:48:59 -04:00
|
|
|
- name: "docs"
|
2019-08-27 15:03:48 -04:00
|
|
|
install: (cd website && npm install)
|
2019-09-17 15:47:30 -04:00
|
|
|
script: (cd website && npm run lint && npm run spellcheck)
|
|
|
|
after_failure: |-
|
|
|
|
echo "FAILURE EXPLANATION:
|
|
|
|
|
|
|
|
If there are spell check errors:
|
|
|
|
|
|
|
|
1) Suppressing False Positives: Edit website/.spelling to add suppressions. Instructions
|
|
|
|
are at the top of the file and explain how to suppress false positives either globally or
|
|
|
|
within a particular file.
|
|
|
|
|
|
|
|
2) Running Spell Check Locally: cd website && npm install && npm run spellcheck
|
|
|
|
|
|
|
|
For more information, refer to: https://www.npmjs.com/package/markdown-spellcheck
|
|
|
|
"
|
2019-08-21 00:48:59 -04:00
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- &integration_batch_index
|
|
|
|
name: "batch index integration test"
|
2019-08-07 12:52:42 -04:00
|
|
|
services: &integration_test_services
|
2017-05-31 21:27:34 -04:00
|
|
|
- docker
|
2019-08-07 12:52:42 -04:00
|
|
|
env: TESTNG_GROUPS='-Dgroups=batch-index'
|
|
|
|
script: &run_integration_test
|
2019-08-20 18:36:19 -04:00
|
|
|
- ${MVN} verify -pl integration-tests -P integration-tests ${TESTNG_GROUPS} ${MAVEN_SKIP}
|
2019-08-07 12:52:42 -04:00
|
|
|
after_failure: &integration_test_diags
|
2017-11-10 17:07:37 -05:00
|
|
|
- for v in ~/shared/logs/*.log ; do
|
|
|
|
echo $v logtail ======================== ; tail -100 $v ;
|
|
|
|
done
|
|
|
|
- for v in broker middlemanager overlord router coordinator historical ; do
|
|
|
|
echo $v dmesg ======================== ;
|
|
|
|
docker exec -it druid-$v sh -c 'dmesg | tail -3' ;
|
|
|
|
done
|
2018-09-19 12:56:15 -04:00
|
|
|
|
Parallel indexing single dim partitions (#8925)
* Parallel indexing single dim partitions
Implements single dimension range partitioning for native parallel batch
indexing as described in #8769. This initial version requires the
druid-datasketches extension to be loaded.
The algorithm has 5 phases that are orchestrated by the supervisor in
`ParallelIndexSupervisorTask#runRangePartitionMultiPhaseParallel()`.
These phases and the main classes involved are described below:
1) In parallel, determine the distribution of dimension values for each
input source split.
`PartialDimensionDistributionTask` uses `StringSketch` to generate
the approximate distribution of dimension values for each input
source split. If the rows are ungrouped,
`PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter`
uses a Bloom filter to skip rows that would be grouped. The final
distribution is sent back to the supervisor via
`DimensionDistributionReport`.
2) The range partitions are determined.
In `ParallelIndexSupervisorTask#determineAllRangePartitions()`, the
supervisor uses `StringSketchMerger` to merge the individual
`StringSketch`es created in the preceding phase. The merged sketch is
then used to create the range partitions.
3) In parallel, generate partial range-partitioned segments.
`PartialRangeSegmentGenerateTask` uses the range partitions
determined in the preceding phase and
`RangePartitionCachingLocalSegmentAllocator` to generate
`SingleDimensionShardSpec`s. The partition information is sent back
to the supervisor via `GeneratedGenericPartitionsReport`.
4) The partial range segments are grouped.
In `ParallelIndexSupervisorTask#groupGenericPartitionLocationsPerPartition()`,
the supervisor creates the `PartialGenericSegmentMergeIOConfig`s
necessary for the next phase.
5) In parallel, merge partial range-partitioned segments.
`PartialGenericSegmentMergeTask` uses `GenericPartitionLocation` to
retrieve the partial range-partitioned segments generated earlier and
then merges and publishes them.
* Fix dependencies & forbidden apis
* Fixes for integration test
* Address review comments
* Fix docs, strict compile, sketch check, rollup check
* Fix first shard spec, partition serde, single subtask
* Fix first partition check in test
* Misc rewording/refactoring to address code review
* Fix doc link
* Split batch index integration test
* Do not run parallel-batch-index twice
* Adjust last partition
* Split ITParallelIndexTest to reduce runtime
* Rename test class
* Allow null values in range partitions
* Indicate which phase failed
* Improve asserts in tests
2019-12-10 02:05:49 -05:00
|
|
|
- &integration_perfect_rollup_parallel_batch_index
|
|
|
|
name: "perfect rollup parallel batch index integration test"
|
|
|
|
services: *integration_test_services
|
|
|
|
env: TESTNG_GROUPS='-Dgroups=perfect-rollup-parallel-batch-index'
|
|
|
|
script: *run_integration_test
|
|
|
|
after_failure: *integration_test_diags
|
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- &integration_kafka_index
|
|
|
|
name: "kafka index integration test"
|
2019-08-07 12:52:42 -04:00
|
|
|
services: *integration_test_services
|
|
|
|
env: TESTNG_GROUPS='-Dgroups=kafka-index'
|
|
|
|
script: *run_integration_test
|
|
|
|
after_failure: *integration_test_diags
|
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- &integration_query
|
|
|
|
name: "query integration test"
|
2019-08-07 12:52:42 -04:00
|
|
|
services: *integration_test_services
|
|
|
|
env: TESTNG_GROUPS='-Dgroups=query'
|
|
|
|
script: *run_integration_test
|
|
|
|
after_failure: *integration_test_diags
|
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- &integration_realtime_index
|
|
|
|
name: "realtime index integration test"
|
2019-08-07 12:52:42 -04:00
|
|
|
services: *integration_test_services
|
|
|
|
env: TESTNG_GROUPS='-Dgroups=realtime-index'
|
|
|
|
script: *run_integration_test
|
|
|
|
after_failure: *integration_test_diags
|
|
|
|
|
2019-08-28 13:29:13 -04:00
|
|
|
- &integration_tests
|
|
|
|
name: "other integration test"
|
2019-08-07 12:52:42 -04:00
|
|
|
services: *integration_test_services
|
Parallel indexing single dim partitions (#8925)
* Parallel indexing single dim partitions
Implements single dimension range partitioning for native parallel batch
indexing as described in #8769. This initial version requires the
druid-datasketches extension to be loaded.
The algorithm has 5 phases that are orchestrated by the supervisor in
`ParallelIndexSupervisorTask#runRangePartitionMultiPhaseParallel()`.
These phases and the main classes involved are described below:
1) In parallel, determine the distribution of dimension values for each
input source split.
`PartialDimensionDistributionTask` uses `StringSketch` to generate
the approximate distribution of dimension values for each input
source split. If the rows are ungrouped,
`PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter`
uses a Bloom filter to skip rows that would be grouped. The final
distribution is sent back to the supervisor via
`DimensionDistributionReport`.
2) The range partitions are determined.
In `ParallelIndexSupervisorTask#determineAllRangePartitions()`, the
supervisor uses `StringSketchMerger` to merge the individual
`StringSketch`es created in the preceding phase. The merged sketch is
then used to create the range partitions.
3) In parallel, generate partial range-partitioned segments.
`PartialRangeSegmentGenerateTask` uses the range partitions
determined in the preceding phase and
`RangePartitionCachingLocalSegmentAllocator` to generate
`SingleDimensionShardSpec`s. The partition information is sent back
to the supervisor via `GeneratedGenericPartitionsReport`.
4) The partial range segments are grouped.
In `ParallelIndexSupervisorTask#groupGenericPartitionLocationsPerPartition()`,
the supervisor creates the `PartialGenericSegmentMergeIOConfig`s
necessary for the next phase.
5) In parallel, merge partial range-partitioned segments.
`PartialGenericSegmentMergeTask` uses `GenericPartitionLocation` to
retrieve the partial range-partitioned segments generated earlier and
then merges and publishes them.
* Fix dependencies & forbidden apis
* Fixes for integration test
* Address review comments
* Fix docs, strict compile, sketch check, rollup check
* Fix first shard spec, partition serde, single subtask
* Fix first partition check in test
* Misc rewording/refactoring to address code review
* Fix doc link
* Split batch index integration test
* Do not run parallel-batch-index twice
* Adjust last partition
* Split ITParallelIndexTest to reduce runtime
* Rename test class
* Allow null values in range partitions
* Indicate which phase failed
* Improve asserts in tests
2019-12-10 02:05:49 -05:00
|
|
|
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index'
|
2019-08-07 12:52:42 -04:00
|
|
|
script: *run_integration_test
|
|
|
|
after_failure: *integration_test_diags
|