2018-11-13 12:38:37 -05:00
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2014-12-19 18:53:19 -05:00
language : java
2019-07-08 15:24:51 -04:00
sudo : true
dist : xenial
2017-07-19 02:19:33 -04:00
2014-12-19 18:53:19 -05:00
jdk :
2019-05-10 20:09:55 -04:00
- openjdk8
2015-01-07 18:07:14 -05:00
2015-08-07 21:05:25 -04:00
cache :
directories :
2017-05-20 14:42:44 -04:00
- $HOME/.m2
2018-07-31 15:56:57 -04:00
2019-08-07 12:52:42 -04:00
env :
global :
- DOCKER_IP=127.0.0.1 # for integration tests
- MVN="mvn -B"
- > # Various options to make execution of maven goals faster (e.g., mvn install)
Web console basic end-to-end-test (#9595)
Load data and query (i.e., automate
https://druid.apache.org/docs/latest/tutorials/tutorial-batch.html) to
have some basic checks ensuring the web console is wired up to druid
correctly.
The new end-to-end tests (tutorial-batch.spec.ts) are added to
`web-console/e2e-tests`. Within that directory:
- `components` represent the various tabs of the web console. Currently,
abstractions for `load data`, `ingestion`, `datasources`, and `query`
are implemented.
- `components/load-data/data-connector` contains abstractions for the
different data source options available to the data loader's `Connect`
step. Currently, only the `Local file` data source connector is
implemented.
- `components/load-data/config` contains abstractions for the different
configuration options available for each step of the data loader flow.
Currently, the `Configure Schema`, `Partition`, and `Publish` steps
have initial implementation of their configuration options.
- `util` contains various helper methods for the tests and does not
contain abstractions of the web console.
Changes to add the new tests to CI:
- `.travis.yml`: New "web console end-to-end tests" job
- `web-console/jest.*.js`: Refactor jest configurations to have
different flavors for unit tests and for end-to-end tests. In
particular, the latter adds a jest setup configuration to wait for the
web console to be ready (`web-console/e2e-tests/util/setup.ts`).
- `web-console/package.json`: Refactor run scripts to add new script for
running end-to-end tests.
- `web-console/script/druid`: Utility scripts for building, starting,
and stopping druid.
Other changes:
- `pom.xml`: Refactor various settings disable java static checks and to
disable java tests into two new maven profiles. Since the same
settings are used in several places (e.g., .travis.yml, Dockerfiles,
etc.), having them in maven profiles makes it more maintainable.
- `web-console/src/console-application.tsx`: Fix typo ("the the").
2020-04-09 15:38:09 -04:00
MAVEN_SKIP="-Pskip-static-checks -Ddruid.console.skip=true -Dmaven.javadoc.skip=true"
- MAVEN_SKIP_TESTS="-Pskip-tests"
2019-08-07 12:52:42 -04:00
# Add various options to make 'mvn install' fast and skip javascript compile (-Ddruid.console.skip=true) since it is not
2019-08-08 21:13:18 -04:00
# needed. Depending on network speeds, "mvn -q install" may take longer than the default 10 minute timeout to print any
# output. To compensate, use travis_wait to extend the timeout.
2019-08-20 18:36:19 -04:00
install : MAVEN_OPTS='-Xmx3000m' travis_wait 15 ${MVN} clean install -q -ff ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C
2019-08-07 12:52:42 -04:00
2020-02-11 16:43:08 -05:00
stages :
- name : test # jobs that do not specify a stage get this default value
if : type != cron
- name : cron
if : type = cron
2019-08-28 13:29:13 -04:00
jobs :
2017-05-31 21:27:34 -04:00
include :
2019-08-07 12:52:42 -04:00
- name : "animal sniffer checks"
2019-08-20 18:36:19 -04:00
script : ${MVN} animal-sniffer:check --fail-at-end
2019-08-07 12:52:42 -04:00
- name : "checkstyle"
2019-08-20 18:36:19 -04:00
script : ${MVN} checkstyle:checkstyle --fail-at-end
2019-08-07 12:52:42 -04:00
- name : "enforcer checks"
2019-08-20 18:36:19 -04:00
script : ${MVN} enforcer:enforce --fail-at-end
2019-08-07 12:52:42 -04:00
- name : "forbidden api checks"
2019-08-20 18:36:19 -04:00
script : ${MVN} forbiddenapis:check forbiddenapis:testCheck --fail-at-end
2019-08-07 12:52:42 -04:00
- name : "pmd checks"
2019-08-20 18:36:19 -04:00
script : ${MVN} pmd:check --fail-at-end # TODO: consider adding pmd:cpd-check
2019-08-07 12:52:42 -04:00
- name : "spotbugs checks"
2019-08-20 18:36:19 -04:00
script : ${MVN} spotbugs:check --fail-at-end -pl '!benchmarks'
2019-08-07 12:52:42 -04:00
- name : "license checks"
install : skip
2019-08-12 16:25:08 -04:00
before_script : &setup_generate_license
- sudo apt-get update && sudo apt-get install python3 python3-pip python3-setuptools -y
- pip3 install wheel # install wheel first explicitly
- pip3 install pyyaml
script :
- >
2019-08-20 18:36:19 -04:00
${MVN} apache-rat:check -Prat --fail-at-end
2019-08-12 16:25:08 -04:00
-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
-Drat.consoleOutput=true
# Generate dependency reports and checks they are valid. When running on Travis CI, 2 cores are available
# (https://docs.travis-ci.com/user/reference/overview/#virtualisation-environment-vs-operating-system).
- mkdir -p target
2019-08-21 15:46:27 -04:00
- distribution/bin/generate-license-dependency-reports.py . target --clean-maven-artifact-transfer --parallel 2
2019-08-30 02:27:43 -04:00
- distribution/bin/check-licenses.py licenses.yaml target/license-reports
2019-08-07 12:52:42 -04:00
2020-05-20 12:31:37 -04:00
- name : "(openjdk8) strict compilation"
2019-08-07 12:52:42 -04:00
install : skip
2017-06-26 21:51:48 -04:00
# Strict compilation requires more than 2 GB
2019-08-07 12:52:42 -04:00
script : >
2019-08-20 18:36:19 -04:00
MAVEN_OPTS='-Xmx3000m' ${MVN} clean -Pstrict compile test-compile --fail-at-end
-pl '!benchmarks' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
2017-06-26 21:51:48 -04:00
2019-09-09 17:37:21 -04:00
- name : "analyze dependencies"
2020-02-11 16:43:08 -05:00
script : |-
MAVEN_OPTS='-Xmx3000m' ${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true || { echo "
2019-09-09 17:37:21 -04:00
The dependency analysis has found a dependency that is either :
1) Used and undeclared : These are available as a transitive dependency but should be explicitly
added to the POM to ensure the dependency version. The XML to add the dependencies to the POM is
shown above.
2) Unused and declared : These are not needed and removing them from the POM will speed up the build
and reduce the artifact size. The dependencies to remove are shown above.
If there are false positive dependency analysis warnings, they can be suppressed :
https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html#usedDependencies
https://maven.apache.org/plugins/maven-dependency-plugin/examples/exclude-dependencies-from-dependency-analysis.html
For more information, refer to :
https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html
2019-12-05 17:34:35 -05:00
2020-02-11 16:43:08 -05:00
" && false; }
2019-12-05 17:34:35 -05:00
2020-02-19 03:34:19 -05:00
- name : "intellij inspections"
script : >
docker run --rm
-v $(pwd):/project
-v ~/.m2:/home/inspect/.m2
ccaominh/intellij-inspect:1.0.0
/project/pom.xml
/project/.idea/inspectionProfiles/Druid.xml
--levels ERROR
--scope JavaInspectionsScope
2019-08-28 13:29:13 -04:00
- &package
name : "(openjdk8) packaging check"
2019-08-12 16:25:08 -04:00
install : skip
before_script : *setup_generate_license
script : >
2020-04-11 21:11:53 -04:00
MAVEN_OPTS='-Xmx3000m' ${MVN} clean install -Prat -Pdist -Pbundle-contrib-exts --fail-at-end
2019-08-20 18:36:19 -04:00
-pl '!benchmarks' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -Ddruid.console.skip=false -T1C
2019-04-07 23:12:23 -04:00
2019-08-28 13:29:13 -04:00
- << : *package
name : "(openjdk11) packaging check"
jdk : openjdk11
- &test_processing_module
name : "(openjdk8) processing module test"
2020-05-20 12:31:37 -04:00
env :
2019-08-07 12:52:42 -04:00
- MAVEN_PROJECTS='processing'
2020-05-20 12:31:37 -04:00
before_script :
- export DRUID_USE_DEFAULT_VALUE_FOR_NULL=true
script :
2019-08-07 12:52:42 -04:00
- unset _JAVA_OPTIONS
# Set MAVEN_OPTS for Surefire launcher. Skip remoteresources to avoid intermittent connection timeouts when
# resolving the SIGAR dependency.
- >
2019-08-20 18:36:19 -04:00
MAVEN_OPTS='-Xmx800m' ${MVN} test -pl ${MAVEN_PROJECTS}
2020-05-20 12:31:37 -04:00
${MAVEN_SKIP} -Dremoteresources.skip=true -Ddruid.generic.useDefaultValueForNull=${DRUID_USE_DEFAULT_VALUE_FOR_NULL}
2018-08-11 01:03:36 -04:00
- sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0"
- free -m
2019-08-28 11:49:30 -04:00
- ${MVN} -pl ${MAVEN_PROJECTS} jacoco:report
2020-05-20 12:31:37 -04:00
# Add merge target branch to determine diff (see https://github.com/travis-ci/travis-ci/issues/6069)
- echo "TRAVIS_BRANCH=${TRAVIS_BRANCH}" # for debugging
- git remote set-branches --add origin ${TRAVIS_BRANCH} && git fetch
# Determine the modified files that match the maven projects being tested. We use maven project lists that
# either exclude (starts with "!") or include (does not start with "!"), so both cases need to be handled.
- all_files="$(git diff --name-only origin/${TRAVIS_BRANCH}...HEAD | grep "\.java$" || [[ $? == 1 ]])"
- for f in ${all_files}; do echo $f; done # for debugging
- >
if [[ "${MAVEN_PROJECTS}" = \!* ]]; then
regex="${MAVEN_PROJECTS:1}";
regex="^${regex//,\!/\\|^}";
project_files="$(echo "${all_files}" | grep -v "${regex}" || [[ $? == 1 ]])";
else
regex="^${MAVEN_PROJECTS//,/\\|^}";
project_files="$(echo "${all_files}" | grep "${regex}" || [[ $? == 1 ]])";
fi
- for f in ${project_files}; do echo $f; done # for debugging
2020-05-26 23:13:08 -04:00
# Check diff code coverage for the maven projects being tested (retry install in case of network error).
# Currently, the function coverage check is not reliable, so it is disabled.
2020-05-20 12:31:37 -04:00
- >
if [ -n "${project_files}" ]; then
travis_retry npm install @connectis/diff-test-coverage@1.5.3
&& git diff origin/${TRAVIS_BRANCH}...HEAD -- ${project_files}
| node_modules/.bin/diff-test-coverage
--coverage "**/target/site/jacoco/jacoco.xml"
--type jacoco
2020-06-02 18:34:58 -04:00
--line-coverage 50
--branch-coverage 50
2020-05-26 23:13:08 -04:00
--function-coverage 0
2020-06-02 18:34:58 -04:00
--log-template "coverage-lines-complete"
--log-template "coverage-files-complete"
--log-template "totals-complete"
--log-template "errors"
2020-05-20 12:31:37 -04:00
--
|| { printf "\nDiff code coverage check failed. To view coverage report, run 'mvn clean test jacoco:report' and open 'target/site/jacoco/index.html'\n" && false; }
fi
after_success :
2019-08-28 11:49:30 -04:00
# retry in case of network error
- travis_retry curl -o codecov.sh -s https://codecov.io/bash
- travis_retry bash codecov.sh -X gcov
2017-06-26 21:51:48 -04:00
2019-08-28 13:29:13 -04:00
- << : *test_processing_module
name : "(openjdk11) processing module test"
jdk : openjdk11
- &test_processing_module_sqlcompat
2020-05-20 12:31:37 -04:00
<< : *test_processing_module
2019-08-28 13:29:13 -04:00
name : "(openjdk8) processing module test (SQL Compatibility)"
2020-05-20 12:31:37 -04:00
before_script : &setup_sqlcompat
- export DRUID_USE_DEFAULT_VALUE_FOR_NULL=false
2018-08-02 11:20:25 -04:00
2019-08-28 13:29:13 -04:00
- << : *test_processing_module_sqlcompat
name : "(openjdk11) processing module test (SQL Compatibility)"
jdk : openjdk11
- &test_indexing_module
2020-05-20 12:31:37 -04:00
<< : *test_processing_module
2019-08-28 13:29:13 -04:00
name : "(openjdk8) indexing modules test"
2020-05-20 12:31:37 -04:00
env :
2019-08-07 12:52:42 -04:00
- MAVEN_PROJECTS='indexing-hadoop,indexing-service,extensions-core/kafka-indexing-service,extensions-core/kinesis-indexing-service'
2019-08-28 13:29:13 -04:00
- << : *test_indexing_module
name : "(openjdk11) indexing modules test"
jdk : openjdk11
- &test_indexing_module_sqlcompat
2020-05-20 12:31:37 -04:00
<< : *test_indexing_module
2019-08-28 13:29:13 -04:00
name : "(openjdk8) indexing modules test (SQL Compatibility)"
2020-05-20 12:31:37 -04:00
before_script : *setup_sqlcompat
2019-08-07 12:52:42 -04:00
2019-08-28 13:29:13 -04:00
- << : *test_indexing_module_sqlcompat
name : "(openjdk11) indexing modules test (SQL Compatibility)"
jdk : openjdk11
- &test_server_module
2020-05-20 12:31:37 -04:00
<< : *test_processing_module
2019-08-28 13:29:13 -04:00
name : "(openjdk8) server module test"
2020-05-20 12:31:37 -04:00
env :
2019-08-07 12:52:42 -04:00
- MAVEN_PROJECTS='server'
2019-08-28 13:29:13 -04:00
- << : *test_server_module
name : "(openjdk11) server module test"
jdk : openjdk11
- &test_server_module_sqlcompat
2020-05-20 12:31:37 -04:00
<< : *test_server_module
2019-08-28 13:29:13 -04:00
name : "(openjdk8) server module test (SQL Compatibility)"
2020-05-20 12:31:37 -04:00
before_script : *setup_sqlcompat
2019-08-07 12:52:42 -04:00
2019-08-28 13:29:13 -04:00
- << : *test_server_module_sqlcompat
name : "(openjdk11) server module test (SQL Compatibility)"
jdk : openjdk11
2020-05-20 12:31:37 -04:00
- &test_other_modules
<< : *test_processing_module
2019-08-28 13:29:13 -04:00
name : "(openjdk8) other modules test"
2020-05-20 12:31:37 -04:00
env :
2019-08-07 12:52:42 -04:00
- MAVEN_PROJECTS='!processing,!indexing-hadoop,!indexing-service,!extensions-core/kafka-indexing-service,!extensions-core/kinesis-indexing-service,!server,!web-console'
2020-05-20 12:31:37 -04:00
- << : *test_other_modules
2019-08-28 13:29:13 -04:00
name : "(openjdk11) other modules test"
jdk : openjdk11
2020-05-20 12:31:37 -04:00
- &test_other_modules_sqlcompat
<< : *test_other_modules
2019-08-28 13:29:13 -04:00
name : "(openjdk8) other modules test (SQL Compatibility)"
2020-05-20 12:31:37 -04:00
before_script : *setup_sqlcompat
2019-08-07 12:52:42 -04:00
2020-05-20 12:31:37 -04:00
- << : *test_other_modules_sqlcompat
2019-08-28 13:29:13 -04:00
name : "(openjdk11) other modules test (SQL Compatibility)"
jdk : openjdk11
Web console basic end-to-end-test (#9595)
Load data and query (i.e., automate
https://druid.apache.org/docs/latest/tutorials/tutorial-batch.html) to
have some basic checks ensuring the web console is wired up to druid
correctly.
The new end-to-end tests (tutorial-batch.spec.ts) are added to
`web-console/e2e-tests`. Within that directory:
- `components` represent the various tabs of the web console. Currently,
abstractions for `load data`, `ingestion`, `datasources`, and `query`
are implemented.
- `components/load-data/data-connector` contains abstractions for the
different data source options available to the data loader's `Connect`
step. Currently, only the `Local file` data source connector is
implemented.
- `components/load-data/config` contains abstractions for the different
configuration options available for each step of the data loader flow.
Currently, the `Configure Schema`, `Partition`, and `Publish` steps
have initial implementation of their configuration options.
- `util` contains various helper methods for the tests and does not
contain abstractions of the web console.
Changes to add the new tests to CI:
- `.travis.yml`: New "web console end-to-end tests" job
- `web-console/jest.*.js`: Refactor jest configurations to have
different flavors for unit tests and for end-to-end tests. In
particular, the latter adds a jest setup configuration to wait for the
web console to be ready (`web-console/e2e-tests/util/setup.ts`).
- `web-console/package.json`: Refactor run scripts to add new script for
running end-to-end tests.
- `web-console/script/druid`: Utility scripts for building, starting,
and stopping druid.
Other changes:
- `pom.xml`: Refactor various settings disable java static checks and to
disable java tests into two new maven profiles. Since the same
settings are used in several places (e.g., .travis.yml, Dockerfiles,
etc.), having them in maven profiles makes it more maintainable.
- `web-console/src/console-application.tsx`: Fix typo ("the the").
2020-04-09 15:38:09 -04:00
- name : "web console"
2019-08-27 15:03:48 -04:00
install : skip
2019-08-28 11:49:30 -04:00
script :
- ${MVN} test -pl 'web-console'
after_success :
- (cd web-console && travis_retry npm run codecov) # retry in case of network error
2018-08-02 11:20:25 -04:00
Web console basic end-to-end-test (#9595)
Load data and query (i.e., automate
https://druid.apache.org/docs/latest/tutorials/tutorial-batch.html) to
have some basic checks ensuring the web console is wired up to druid
correctly.
The new end-to-end tests (tutorial-batch.spec.ts) are added to
`web-console/e2e-tests`. Within that directory:
- `components` represent the various tabs of the web console. Currently,
abstractions for `load data`, `ingestion`, `datasources`, and `query`
are implemented.
- `components/load-data/data-connector` contains abstractions for the
different data source options available to the data loader's `Connect`
step. Currently, only the `Local file` data source connector is
implemented.
- `components/load-data/config` contains abstractions for the different
configuration options available for each step of the data loader flow.
Currently, the `Configure Schema`, `Partition`, and `Publish` steps
have initial implementation of their configuration options.
- `util` contains various helper methods for the tests and does not
contain abstractions of the web console.
Changes to add the new tests to CI:
- `.travis.yml`: New "web console end-to-end tests" job
- `web-console/jest.*.js`: Refactor jest configurations to have
different flavors for unit tests and for end-to-end tests. In
particular, the latter adds a jest setup configuration to wait for the
web console to be ready (`web-console/e2e-tests/util/setup.ts`).
- `web-console/package.json`: Refactor run scripts to add new script for
running end-to-end tests.
- `web-console/script/druid`: Utility scripts for building, starting,
and stopping druid.
Other changes:
- `pom.xml`: Refactor various settings disable java static checks and to
disable java tests into two new maven profiles. Since the same
settings are used in several places (e.g., .travis.yml, Dockerfiles,
etc.), having them in maven profiles makes it more maintainable.
- `web-console/src/console-application.tsx`: Fix typo ("the the").
2020-04-09 15:38:09 -04:00
- name : "web console end-to-end test"
before_install : *setup_generate_license
install : web-console/script/druid build
before_script : web-console/script/druid start
script : (cd web-console && npm run test-e2e)
after_script : web-console/script/druid stop
2019-08-21 00:48:59 -04:00
- name : "docs"
2019-08-27 15:03:48 -04:00
install : (cd website && npm install)
2020-02-11 16:43:08 -05:00
script : |-
(cd website && npm run lint && npm run spellcheck) || { echo "
2019-09-17 15:47:30 -04:00
If there are spell check errors :
1) Suppressing False Positives : Edit website/.spelling to add suppressions. Instructions
are at the top of the file and explain how to suppress false positives either globally or
within a particular file.
2) Running Spell Check Locally : cd website && npm install && npm run spellcheck
For more information, refer to : https://www.npmjs.com/package/markdown-spellcheck
2020-02-11 16:43:08 -05:00
" && false; }
2019-08-21 00:48:59 -04:00
2020-02-12 19:36:31 -05:00
# Integration tests Java Compile version is set by the machine environment jdk (set by the jdk key)
# Integration tests Java Runtime version is set by the JVM_RUNTIME env property (set env key to -Djvm.runtime=<JVM_RUNTIME_VERSION>)
# (Currently integration tests only support running with jvm runtime 8 and 11)
# START - Integration tests for Compile with Java 8 and Run with Java 8
2019-08-28 13:29:13 -04:00
- &integration_batch_index
2020-02-12 19:36:31 -05:00
name : "(Compile=openjdk8, Run=openjdk8) batch index integration test"
jdk : openjdk8
2019-08-07 12:52:42 -04:00
services : &integration_test_services
2017-05-31 21:27:34 -04:00
- docker
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=batch-index' JVM_RUNTIME='-Djvm.runtime=8'
2019-08-07 12:52:42 -04:00
script : &run_integration_test
2020-02-12 19:36:31 -05:00
- ${MVN} verify -pl integration-tests -P integration-tests ${TESTNG_GROUPS} ${JVM_RUNTIME} ${MAVEN_SKIP}
2019-08-07 12:52:42 -04:00
after_failure : &integration_test_diags
2017-11-10 17:07:37 -05:00
- for v in ~/shared/logs/*.log ; do
echo $v logtail ======================== ; tail -100 $v ;
done
- for v in broker middlemanager overlord router coordinator historical ; do
echo $v dmesg ======================== ;
docker exec -it druid-$v sh -c 'dmesg | tail -3' ;
done
2018-09-19 12:56:15 -04:00
Parallel indexing single dim partitions (#8925)
* Parallel indexing single dim partitions
Implements single dimension range partitioning for native parallel batch
indexing as described in #8769. This initial version requires the
druid-datasketches extension to be loaded.
The algorithm has 5 phases that are orchestrated by the supervisor in
`ParallelIndexSupervisorTask#runRangePartitionMultiPhaseParallel()`.
These phases and the main classes involved are described below:
1) In parallel, determine the distribution of dimension values for each
input source split.
`PartialDimensionDistributionTask` uses `StringSketch` to generate
the approximate distribution of dimension values for each input
source split. If the rows are ungrouped,
`PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter`
uses a Bloom filter to skip rows that would be grouped. The final
distribution is sent back to the supervisor via
`DimensionDistributionReport`.
2) The range partitions are determined.
In `ParallelIndexSupervisorTask#determineAllRangePartitions()`, the
supervisor uses `StringSketchMerger` to merge the individual
`StringSketch`es created in the preceding phase. The merged sketch is
then used to create the range partitions.
3) In parallel, generate partial range-partitioned segments.
`PartialRangeSegmentGenerateTask` uses the range partitions
determined in the preceding phase and
`RangePartitionCachingLocalSegmentAllocator` to generate
`SingleDimensionShardSpec`s. The partition information is sent back
to the supervisor via `GeneratedGenericPartitionsReport`.
4) The partial range segments are grouped.
In `ParallelIndexSupervisorTask#groupGenericPartitionLocationsPerPartition()`,
the supervisor creates the `PartialGenericSegmentMergeIOConfig`s
necessary for the next phase.
5) In parallel, merge partial range-partitioned segments.
`PartialGenericSegmentMergeTask` uses `GenericPartitionLocation` to
retrieve the partial range-partitioned segments generated earlier and
then merges and publishes them.
* Fix dependencies & forbidden apis
* Fixes for integration test
* Address review comments
* Fix docs, strict compile, sketch check, rollup check
* Fix first shard spec, partition serde, single subtask
* Fix first partition check in test
* Misc rewording/refactoring to address code review
* Fix doc link
* Split batch index integration test
* Do not run parallel-batch-index twice
* Adjust last partition
* Split ITParallelIndexTest to reduce runtime
* Rename test class
* Allow null values in range partitions
* Indicate which phase failed
* Improve asserts in tests
2019-12-10 02:05:49 -05:00
- &integration_perfect_rollup_parallel_batch_index
2020-02-12 19:36:31 -05:00
name : "(Compile=openjdk8, Run=openjdk8) perfect rollup parallel batch index integration test"
jdk : openjdk8
Parallel indexing single dim partitions (#8925)
* Parallel indexing single dim partitions
Implements single dimension range partitioning for native parallel batch
indexing as described in #8769. This initial version requires the
druid-datasketches extension to be loaded.
The algorithm has 5 phases that are orchestrated by the supervisor in
`ParallelIndexSupervisorTask#runRangePartitionMultiPhaseParallel()`.
These phases and the main classes involved are described below:
1) In parallel, determine the distribution of dimension values for each
input source split.
`PartialDimensionDistributionTask` uses `StringSketch` to generate
the approximate distribution of dimension values for each input
source split. If the rows are ungrouped,
`PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter`
uses a Bloom filter to skip rows that would be grouped. The final
distribution is sent back to the supervisor via
`DimensionDistributionReport`.
2) The range partitions are determined.
In `ParallelIndexSupervisorTask#determineAllRangePartitions()`, the
supervisor uses `StringSketchMerger` to merge the individual
`StringSketch`es created in the preceding phase. The merged sketch is
then used to create the range partitions.
3) In parallel, generate partial range-partitioned segments.
`PartialRangeSegmentGenerateTask` uses the range partitions
determined in the preceding phase and
`RangePartitionCachingLocalSegmentAllocator` to generate
`SingleDimensionShardSpec`s. The partition information is sent back
to the supervisor via `GeneratedGenericPartitionsReport`.
4) The partial range segments are grouped.
In `ParallelIndexSupervisorTask#groupGenericPartitionLocationsPerPartition()`,
the supervisor creates the `PartialGenericSegmentMergeIOConfig`s
necessary for the next phase.
5) In parallel, merge partial range-partitioned segments.
`PartialGenericSegmentMergeTask` uses `GenericPartitionLocation` to
retrieve the partial range-partitioned segments generated earlier and
then merges and publishes them.
* Fix dependencies & forbidden apis
* Fixes for integration test
* Address review comments
* Fix docs, strict compile, sketch check, rollup check
* Fix first shard spec, partition serde, single subtask
* Fix first partition check in test
* Misc rewording/refactoring to address code review
* Fix doc link
* Split batch index integration test
* Do not run parallel-batch-index twice
* Adjust last partition
* Split ITParallelIndexTest to reduce runtime
* Rename test class
* Allow null values in range partitions
* Indicate which phase failed
* Improve asserts in tests
2019-12-10 02:05:49 -05:00
services : *integration_test_services
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=perfect-rollup-parallel-batch-index' JVM_RUNTIME='-Djvm.runtime=8'
Parallel indexing single dim partitions (#8925)
* Parallel indexing single dim partitions
Implements single dimension range partitioning for native parallel batch
indexing as described in #8769. This initial version requires the
druid-datasketches extension to be loaded.
The algorithm has 5 phases that are orchestrated by the supervisor in
`ParallelIndexSupervisorTask#runRangePartitionMultiPhaseParallel()`.
These phases and the main classes involved are described below:
1) In parallel, determine the distribution of dimension values for each
input source split.
`PartialDimensionDistributionTask` uses `StringSketch` to generate
the approximate distribution of dimension values for each input
source split. If the rows are ungrouped,
`PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter`
uses a Bloom filter to skip rows that would be grouped. The final
distribution is sent back to the supervisor via
`DimensionDistributionReport`.
2) The range partitions are determined.
In `ParallelIndexSupervisorTask#determineAllRangePartitions()`, the
supervisor uses `StringSketchMerger` to merge the individual
`StringSketch`es created in the preceding phase. The merged sketch is
then used to create the range partitions.
3) In parallel, generate partial range-partitioned segments.
`PartialRangeSegmentGenerateTask` uses the range partitions
determined in the preceding phase and
`RangePartitionCachingLocalSegmentAllocator` to generate
`SingleDimensionShardSpec`s. The partition information is sent back
to the supervisor via `GeneratedGenericPartitionsReport`.
4) The partial range segments are grouped.
In `ParallelIndexSupervisorTask#groupGenericPartitionLocationsPerPartition()`,
the supervisor creates the `PartialGenericSegmentMergeIOConfig`s
necessary for the next phase.
5) In parallel, merge partial range-partitioned segments.
`PartialGenericSegmentMergeTask` uses `GenericPartitionLocation` to
retrieve the partial range-partitioned segments generated earlier and
then merges and publishes them.
* Fix dependencies & forbidden apis
* Fixes for integration test
* Address review comments
* Fix docs, strict compile, sketch check, rollup check
* Fix first shard spec, partition serde, single subtask
* Fix first partition check in test
* Misc rewording/refactoring to address code review
* Fix doc link
* Split batch index integration test
* Do not run parallel-batch-index twice
* Adjust last partition
* Split ITParallelIndexTest to reduce runtime
* Rename test class
* Allow null values in range partitions
* Indicate which phase failed
* Improve asserts in tests
2019-12-10 02:05:49 -05:00
script : *run_integration_test
after_failure : *integration_test_diags
2019-08-28 13:29:13 -04:00
- &integration_kafka_index
2020-02-12 19:36:31 -05:00
name : "(Compile=openjdk8, Run=openjdk8) kafka index integration test"
jdk : openjdk8
2019-08-07 12:52:42 -04:00
services : *integration_test_services
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=kafka-index' JVM_RUNTIME='-Djvm.runtime=8'
2019-08-07 12:52:42 -04:00
script : *run_integration_test
after_failure : *integration_test_diags
2020-04-22 13:43:34 -04:00
- &integration_kafka_index_slow
name : "(Compile=openjdk8, Run=openjdk8) kafka index integration test slow"
jdk : openjdk8
services : *integration_test_services
env : TESTNG_GROUPS='-Dgroups=kafka-index-slow' JVM_RUNTIME='-Djvm.runtime=8'
script : *run_integration_test
after_failure : *integration_test_diags
- &integration_kafka_transactional_index
name : "(Compile=openjdk8, Run=openjdk8) transactional kafka index integration test"
jdk : openjdk8
services : *integration_test_services
env : TESTNG_GROUPS='-Dgroups=kafka-transactional-index' JVM_RUNTIME='-Djvm.runtime=8'
script : *run_integration_test
after_failure : *integration_test_diags
- &integration_kafka_transactional_index_slow
name : "(Compile=openjdk8, Run=openjdk8) transactional kafka index integration test slow"
jdk : openjdk8
services : *integration_test_services
env : TESTNG_GROUPS='-Dgroups=kafka-transactional-index-slow' JVM_RUNTIME='-Djvm.runtime=8'
script : *run_integration_test
after_failure : *integration_test_diags
2020-04-29 16:18:01 -04:00
- &integration_kafka_format_tests
name : "(Compile=openjdk8, Run=openjdk8) Kafka index integration test with various formats"
jdk : openjdk8
services : *integration_test_services
env : TESTNG_GROUPS='-Dgroups=kafka-data-format' JVM_RUNTIME='-Djvm.runtime=8'
script : *run_integration_test
after_failure : *integration_test_diags
2019-08-28 13:29:13 -04:00
- &integration_query
2020-02-12 19:36:31 -05:00
name : "(Compile=openjdk8, Run=openjdk8) query integration test"
jdk : openjdk8
2019-08-07 12:52:42 -04:00
services : *integration_test_services
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=query' JVM_RUNTIME='-Djvm.runtime=8'
2019-08-07 12:52:42 -04:00
script : *run_integration_test
after_failure : *integration_test_diags
2020-03-17 06:08:44 -04:00
- &integration_security
name : "(Compile=openjdk8, Run=openjdk8) security integration test"
jdk : openjdk8
services : *integration_test_services
env : TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=8'
script : *run_integration_test
after_failure : *integration_test_diags
2019-08-28 13:29:13 -04:00
- &integration_realtime_index
2020-02-12 19:36:31 -05:00
name : "(Compile=openjdk8, Run=openjdk8) realtime index integration test"
jdk : openjdk8
2019-08-07 12:52:42 -04:00
services : *integration_test_services
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=realtime-index' JVM_RUNTIME='-Djvm.runtime=8'
2019-08-07 12:52:42 -04:00
script : *run_integration_test
after_failure : *integration_test_diags
2019-08-28 13:29:13 -04:00
- &integration_tests
2020-02-12 19:36:31 -05:00
name : "(Compile=openjdk8, Run=openjdk8) other integration test"
jdk : openjdk8
2019-08-07 12:52:42 -04:00
services : *integration_test_services
2020-04-30 17:33:01 -04:00
env : TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
2019-08-07 12:52:42 -04:00
script : *run_integration_test
after_failure : *integration_test_diags
2020-02-12 19:36:31 -05:00
# END - Integration tests for Compile with Java 8 and Run with Java 8
2020-02-11 16:43:08 -05:00
2020-03-11 12:22:27 -04:00
# START - Integration tests for Compile with Java 8 and Run with Java 11
2020-02-12 19:36:31 -05:00
- << : *integration_batch_index
2020-03-11 12:22:27 -04:00
name : "(Compile=openjdk8, Run=openjdk11) batch index integration test"
jdk : openjdk8
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=batch-index' JVM_RUNTIME='-Djvm.runtime=11'
- << : *integration_perfect_rollup_parallel_batch_index
2020-03-11 12:22:27 -04:00
name : "(Compile=openjdk8, Run=openjdk11) perfect rollup parallel batch index integration test"
jdk : openjdk8
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=perfect-rollup-parallel-batch-index' JVM_RUNTIME='-Djvm.runtime=11'
- << : *integration_query
2020-03-11 12:22:27 -04:00
name : "(Compile=openjdk8, Run=openjdk11) query integration test"
jdk : openjdk8
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=query' JVM_RUNTIME='-Djvm.runtime=11'
2020-03-17 06:08:44 -04:00
- << : *integration_security
name : "(Compile=openjdk8, Run=openjdk11) security integration test"
jdk : openjdk8
env : TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=11'
2020-02-12 19:36:31 -05:00
- << : *integration_realtime_index
2020-03-11 12:22:27 -04:00
name : "(Compile=openjdk8, Run=openjdk11) realtime index integration test"
jdk : openjdk8
2020-02-12 19:36:31 -05:00
env : TESTNG_GROUPS='-Dgroups=realtime-index' JVM_RUNTIME='-Djvm.runtime=11'
- << : *integration_tests
2020-03-11 12:22:27 -04:00
name : "(Compile=openjdk8, Run=openjdk11) other integration test"
jdk : openjdk8
2020-04-30 17:33:01 -04:00
env : TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
2020-03-11 12:22:27 -04:00
# END - Integration tests for Compile with Java 8 and Run with Java 11
2020-03-17 06:08:44 -04:00
2020-02-11 16:43:08 -05:00
- name : "security vulnerabilities"
stage : cron
install : skip
script : |-
${MVN} dependency-check:check || { echo "
The OWASP dependency check has found security vulnerabilities. Please use a newer version
of the dependency that does not have vulnerabilities. If the analysis has false positives,
they can be suppressed by adding entries to owasp-dependency-check-suppressions.xml (for more
information, see https://jeremylong.github.io/DependencyCheck/general/suppression.html).
" && false; }
# Travis CI only supports per build (and not per-job notifications): https://github.com/travis-ci/travis-ci/issues/9888
notifications :
email :
if : type = cron
recipients :
# This is the string "dev@druid.apache.org" encrypted against the apache/druid repo so that forks are unable to
# use this notification:
# https://github.com/travis-ci/travis-ci/issues/1094#issuecomment-215019909
# https://github.com/travis-ci/travis-ci/issues/2711
- secure : "MupjX/0jLwh3XzHPl74BTk2/Kp5r+8TrEewfRhpQdWKFMBXLKNqu0k2VXf5C/NIg3uvPianq3REk+qeTHI8dL2ShjiWS/eIRkJOHLfObdNNBuos5fo4TxAuBQcXyT4VjAq5jnAkH84Pxf2Nl0rkisWoIhvwSX7+kNrjW1qdu7K0="
on_success : change
on_failure : change