mirror of
https://github.com/apache/druid.git
synced 2025-02-22 18:30:13 +00:00
Adding s3, gcs, azure integration tests (#9501)
* exclude pulling s3 segments for tests that doesnt need it * fix script * fix script * fix script * add s3 test * refactor sample data script * add tests * add tests * add license header * fix failing tests * change bucket and path to config * update integration test readme * fix typo
This commit is contained in:
parent
b1847364b0
commit
4c620b8f1c
19
.travis.yml
19
.travis.yml
@ -324,6 +324,14 @@ jobs:
|
||||
script: *run_integration_test
|
||||
after_failure: *integration_test_diags
|
||||
|
||||
- &integration_security
|
||||
name: "(Compile=openjdk8, Run=openjdk8) security integration test"
|
||||
jdk: openjdk8
|
||||
services: *integration_test_services
|
||||
env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=8'
|
||||
script: *run_integration_test
|
||||
after_failure: *integration_test_diags
|
||||
|
||||
- &integration_realtime_index
|
||||
name: "(Compile=openjdk8, Run=openjdk8) realtime index integration test"
|
||||
jdk: openjdk8
|
||||
@ -336,7 +344,7 @@ jobs:
|
||||
name: "(Compile=openjdk8, Run=openjdk8) other integration test"
|
||||
jdk: openjdk8
|
||||
services: *integration_test_services
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=8'
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
|
||||
script: *run_integration_test
|
||||
after_failure: *integration_test_diags
|
||||
# END - Integration tests for Compile with Java 8 and Run with Java 8
|
||||
@ -362,6 +370,11 @@ jobs:
|
||||
jdk: openjdk8
|
||||
env: TESTNG_GROUPS='-Dgroups=query' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
|
||||
- <<: *integration_security
|
||||
name: "(Compile=openjdk8, Run=openjdk11) security integration test"
|
||||
jdk: openjdk8
|
||||
env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
|
||||
- <<: *integration_realtime_index
|
||||
name: "(Compile=openjdk8, Run=openjdk11) realtime index integration test"
|
||||
jdk: openjdk8
|
||||
@ -370,9 +383,9 @@ jobs:
|
||||
- <<: *integration_tests
|
||||
name: "(Compile=openjdk8, Run=openjdk11) other integration test"
|
||||
jdk: openjdk8
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
# END - Integration tests for Compile with Java 8 and Run with Java 11
|
||||
|
||||
|
||||
- name: "security vulnerabilities"
|
||||
stage: cron
|
||||
install: skip
|
||||
|
@ -48,9 +48,10 @@ export DOCKER_IP=127.0.0.1
|
||||
|
||||
## Running tests
|
||||
|
||||
To run all the tests using docker and mvn run the following command:
|
||||
To run all tests from a test group using docker and mvn run the following command:
|
||||
(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java)
|
||||
```
|
||||
mvn verify -P integration-tests
|
||||
mvn verify -P integration-tests -Dgroups=<test_group>
|
||||
```
|
||||
|
||||
To run only a single test using mvn run the following command:
|
||||
@ -61,6 +62,12 @@ To run only a single test using mvn run the following command:
|
||||
Add `-rf :druid-integration-tests` when running integration tests for the second time or later without changing
|
||||
the code of core modules in between to skip up-to-date checks for the whole module dependency tree.
|
||||
|
||||
Integration tests can also be run with either Java 8 or Java 11 by adding -Djvm.runtime=# to mvn command, where #
|
||||
can either be 8 or 11.
|
||||
|
||||
Druid's configuration (using Docker) can be overrided by providing -Doverride.config.path=<PATH_TO_FILE>.
|
||||
The file must contain one property per line, the key must start with druid_ and the format should be snake case.
|
||||
|
||||
Running Tests Using A Configuration File for Any Cluster
|
||||
-------------------
|
||||
|
||||
@ -79,6 +86,8 @@ To run tests on any druid cluster that is already running, create a configuratio
|
||||
"coordinator_port": "<coordinator_port>",
|
||||
"middlemanager_host": "<middle_manager_ip>",
|
||||
"zookeeper_hosts": "<comma-separated list of zookeeper_ip:zookeeper_port>",
|
||||
"cloud_bucket": "<(optional) cloud_bucket for test data if running cloud integration test>",
|
||||
"cloud_path": "<(optional) cloud_path for test data if running cloud integration test>",
|
||||
}
|
||||
|
||||
Set the environment variable CONFIG_FILE to the name of the configuration file:
|
||||
@ -86,9 +95,10 @@ Set the environment variable CONFIG_FILE to the name of the configuration file:
|
||||
export CONFIG_FILE=<config file name>
|
||||
```
|
||||
|
||||
To run all the tests using mvn run the following command:
|
||||
To run all tests from a test group using mvn run the following command:
|
||||
(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java)
|
||||
```
|
||||
mvn verify -P int-tests-config-file
|
||||
mvn verify -P int-tests-config-file -Dgroups=<test_group>
|
||||
```
|
||||
|
||||
To run only a single test using mvn run the following command:
|
||||
@ -96,6 +106,34 @@ To run only a single test using mvn run the following command:
|
||||
mvn verify -P int-tests-config-file -Dit.test=<test_name>
|
||||
```
|
||||
|
||||
Running a Test That Uses Cloud
|
||||
-------------------
|
||||
The integration test that indexes from Cloud or uses Cloud as deep storage is not run as part
|
||||
of the integration test run discussed above. Running these tests requires the user to provide
|
||||
their own Cloud.
|
||||
|
||||
Currently, the integration test supports Google Cloud Storage, Amazon S3, and Microsoft Azure.
|
||||
These can be run by providing "gcs-deep-storage", "s3-deep-storage", or "azure-deep-storage"
|
||||
to -Dgroups for Google Cloud Storage, Amazon S3, and Microsoft Azure respectively. Note that only
|
||||
one group should be run per mvn command.
|
||||
|
||||
In addition to specifying the -Dgroups to mvn command, the following will need to be provided:
|
||||
1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
-Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1.
|
||||
3) Provide -Doverride.config.path=<PATH_TO_FILE> with your Cloud credentials/configs set. See
|
||||
integration-tests/docker/environment-configs/override-examples/ directory for env vars to provide for each Cloud storage.
|
||||
|
||||
For running Google Cloud Storage, in addition to the above, you will also have to:
|
||||
1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
|
||||
For example, to run integration test for Google Cloud Storage:
|
||||
```
|
||||
mvn verify -P integration-tests -Dgroups=gcs-deep-storage -Doverride.config.path=<PATH_TO_FILE> -Dresource.file.dir.path=<PATH_TO_FOLDER> -Ddruid.test.config.cloudBucket=test-bucket -Ddruid.test.config.cloudPath=test-data-folder/
|
||||
```
|
||||
|
||||
|
||||
Running a Test That Uses Hadoop
|
||||
-------------------
|
||||
|
||||
|
@ -46,11 +46,7 @@ RUN ln -sf /usr/share/java/mysql-connector-java.jar /usr/local/druid/lib/mysql-c
|
||||
RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
|
||||
&& java -cp "/usr/local/druid/lib/*" -Ddruid.metadata.storage.type=mysql org.apache.druid.cli.Main tools metadata-init --connectURI="jdbc:mysql://localhost:3306/druid" --user=druid --password=diurd \
|
||||
&& /etc/init.d/mysql stop
|
||||
ADD sample-data.sql sample-data.sql
|
||||
# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
|
||||
RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
|
||||
&& cat sample-data.sql | mysql -u root druid \
|
||||
&& /etc/init.d/mysql stop
|
||||
ADD test-data /test-data
|
||||
|
||||
# Setup supervisord
|
||||
ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
@ -101,6 +97,8 @@ WORKDIR /var/lib/druid
|
||||
ENTRYPOINT /tls/generate-server-certs-and-keystores.sh \
|
||||
# Create druid service config files with all the config variables
|
||||
&& . /druid.sh; setupConfig \
|
||||
# Some test groups require pre-existing data to be setup
|
||||
&& . /druid.sh; setupData \
|
||||
# Export the service config file path to use in supervisord conf file
|
||||
&& export DRUID_COMMON_CONF_DIR="$(. /druid.sh; getConfPath ${DRUID_SERVICE})" \
|
||||
# Export the common config file path to use in supervisord conf file
|
||||
|
@ -17,7 +17,8 @@
|
||||
|
||||
set -e
|
||||
|
||||
getConfPath() {
|
||||
getConfPath()
|
||||
{
|
||||
cluster_conf_base=/tmp/conf/druid/cluster
|
||||
case "$1" in
|
||||
_common) echo $cluster_conf_base/_common ;;
|
||||
@ -31,7 +32,8 @@ getConfPath() {
|
||||
}
|
||||
|
||||
# Delete the old key (if existing) and append new key=value
|
||||
setKey() {
|
||||
setKey()
|
||||
{
|
||||
service="$1"
|
||||
key="$2"
|
||||
value="$3"
|
||||
@ -45,7 +47,8 @@ setKey() {
|
||||
echo "Setting $key=$value in $service_conf"
|
||||
}
|
||||
|
||||
setupConfig() {
|
||||
setupConfig()
|
||||
{
|
||||
echo "$(date -Is) configuring service $DRUID_SERVICE"
|
||||
|
||||
# We put all the config in /tmp/conf to allow for a
|
||||
@ -63,7 +66,6 @@ setupConfig() {
|
||||
setKey $DRUID_SERVICE druid.host $(resolveip -s $HOSTNAME)
|
||||
setKey $DRUID_SERVICE druid.worker.ip $(resolveip -s $HOSTNAME)
|
||||
|
||||
|
||||
# Write out all the environment variables starting with druid_ to druid service config file
|
||||
# This will replace _ with . in the key
|
||||
env | grep ^druid_ | while read evar;
|
||||
@ -73,4 +75,23 @@ setupConfig() {
|
||||
var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?g')
|
||||
setKey $DRUID_SERVICE "$var" "$val"
|
||||
done
|
||||
}
|
||||
}
|
||||
|
||||
setupData()
|
||||
{
|
||||
# The "query" and "security" test groups require data to be setup before running the tests.
|
||||
# In particular, they requires segments to be download from a pre-existing s3 bucket.
|
||||
# This is done by using the loadSpec put into metadatastore and s3 credientials set below.
|
||||
if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "security" ]; then
|
||||
# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
|
||||
find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
|
||||
&& cat /test-data/${DRUID_INTEGRATION_TEST_GROUP}-sample-data.sql | mysql -u root druid && /etc/init.d/mysql stop
|
||||
# below s3 credentials needed to access the pre-existing s3 bucket
|
||||
setKey $DRUID_SERVICE druid.s3.accessKey AKIAJI7DG7CDECGBQ6NA
|
||||
setKey $DRUID_SERVICE druid.s3.secretKey OBaLISDFjKLajSTrJ53JoTtzTZLjPlRePcwa+Pjv
|
||||
setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\"]
|
||||
# The region of the sample data s3 blobs needed for these test groups
|
||||
export AWS_REGION=us-east-1
|
||||
fi
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,8 @@ COMMON_DRUID_JAVA_OPTS=-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Dlog4j.configu
|
||||
DRUID_DEP_LIB_DIR=/shared/hadoop_xml/*:/shared/docker/lib/*:/usr/local/druid/lib/mysql-connector-java.jar
|
||||
|
||||
# Druid configs
|
||||
druid_extensions_loadList=[]
|
||||
druid_extensions_directory=/shared/docker/extensions
|
||||
druid_auth_authenticator_basic_authorizerName=basic
|
||||
druid_auth_authenticator_basic_initialAdminPassword=priest
|
||||
druid_auth_authenticator_basic_initialInternalClientPassword=warlock
|
||||
|
@ -24,8 +24,6 @@ DRUID_LOG_PATH=/shared/logs/historical.log
|
||||
SERVICE_DRUID_JAVA_OPTS=-server -Xmx512m -Xms512m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:+UseG1GC
|
||||
|
||||
# Druid configs
|
||||
druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA
|
||||
druid_s3_secretKey=OBaLISDFjKLajSTrJ53JoTtzTZLjPlRePcwa+Pjv
|
||||
druid_processing_buffer_sizeBytes=25000000
|
||||
druid_processing_numThreads=2
|
||||
druid_query_groupBy_maxOnDiskStorage=300000000
|
||||
|
@ -32,8 +32,6 @@ druid_indexer_runner_javaOptsArray=["-server", "-Xmx256m", "-Xms256m", "-XX:NewS
|
||||
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=25000000
|
||||
druid_indexer_fork_property_druid_processing_numThreads=1
|
||||
druid_indexer_fork_server_http_numThreads=20
|
||||
druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA
|
||||
druid_s3_secretKey=OBaLISDFjKLajSTrJ53JoTtzTZLjPlRePcwa+Pjv
|
||||
druid_selectors_indexing_serviceName=druid/overlord
|
||||
druid_indexer_task_chathandler_type=announce
|
||||
druid_auth_basic_common_cacheDirectory=/tmp/authCache/middleManager
|
||||
|
@ -0,0 +1,28 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||
#
|
||||
druid_storage_type=azure
|
||||
druid_azure_account=<OVERRIDE_THIS>
|
||||
druid_azure_key=<OVERRIDE_THIS>
|
||||
druid_azure_container=<OVERRIDE_THIS>
|
||||
druid_extensions_loadList=["druid-azure-extensions"]
|
@ -0,0 +1,29 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
|
||||
#
|
||||
druid_storage_type=google
|
||||
druid_google_bucket=<OVERRIDE_THIS>
|
||||
druid_google_prefix=<OVERRIDE_THIS>
|
||||
druid_extensions_loadList=["druid-google-extensions"]
|
||||
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>
|
||||
|
@ -0,0 +1,30 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||
#
|
||||
druid_storage_type=s3
|
||||
druid_storage_bucket=<OVERRIDE_THIS>
|
||||
druid_storage_baseKey=druid/segments
|
||||
druid_s3_accessKey=<OVERRIDE_THIS>
|
||||
druid_s3_secretKey=<OVERRIDE_THIS>
|
||||
AWS_REGION=<OVERRIDE_THIS>
|
||||
druid_extensions_loadList=["druid-s3-extensions"]
|
@ -4,4 +4,3 @@ redirect_stderr=true
|
||||
priority=100
|
||||
autorestart=false
|
||||
stdout_logfile=%(ENV_DRUID_LOG_PATH)s
|
||||
environment=AWS_REGION=us-east-1
|
||||
|
@ -18,5 +18,3 @@ INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,ver
|
||||
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9','twitterstream','2013-05-13T00:03:48.807Z','2013-01-03T00:00:00.000Z','2013-01-04T00:00:00.000Z',0,'2013-01-04T04:09:13.590Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z\",\"version\":\"2013-01-04T04:09:13.590Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z/2013-01-04T04:09:13.590Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":411651320,\"identifier\":\"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9\"}');
|
||||
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','wikipedia_editstream','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"wikipedia_editstream\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');
|
||||
INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}');
|
||||
INSERT INTO druid_tasks (id, created_date, datasource, payload, status_payload, active) VALUES ('index_auth_test_2030-04-30T01:13:31.893Z', '2030-04-30T01:13:31.893Z', 'auth_test', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"created_date\":\"2030-04-30T01:13:31.893Z\",\"datasource\":\"auth_test\",\"active\":0}', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"status\":\"SUCCESS\",\"duration\":1}', 0);
|
||||
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','auth_test','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');
|
17
integration-tests/docker/test-data/security-sample-data.sql
Normal file
17
integration-tests/docker/test-data/security-sample-data.sql
Normal file
@ -0,0 +1,17 @@
|
||||
-- Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
-- contributor license agreements. See the NOTICE file distributed with
|
||||
-- this work for additional information regarding copyright ownership.
|
||||
-- The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
-- (the "License"); you may not use this file except in compliance with
|
||||
-- the License. You may obtain a copy of the License at
|
||||
--
|
||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
||||
--
|
||||
-- Unless required by applicable law or agreed to in writing, software
|
||||
-- distributed under the License is distributed on an "AS IS" BASIS,
|
||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
-- See the License for the specific language governing permissions and
|
||||
-- limitations under the License.
|
||||
|
||||
INSERT INTO druid_tasks (id, created_date, datasource, payload, status_payload, active) VALUES ('index_auth_test_2030-04-30T01:13:31.893Z', '2030-04-30T01:13:31.893Z', 'auth_test', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"created_date\":\"2030-04-30T01:13:31.893Z\",\"datasource\":\"auth_test\",\"active\":0}', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"status\":\"SUCCESS\",\"duration\":1}', 0);
|
||||
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','auth_test','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');
|
@ -43,6 +43,18 @@
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-azure-extensions</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-google-extensions</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-datasketches</artifactId>
|
||||
@ -268,6 +280,7 @@
|
||||
<properties>
|
||||
<start.hadoop.docker>false</start.hadoop.docker>
|
||||
<override.config.path></override.config.path>
|
||||
<resource.file.dir.path></resource.file.dir.path>
|
||||
</properties>
|
||||
<build>
|
||||
<plugins>
|
||||
@ -287,6 +300,7 @@
|
||||
<DRUID_INTEGRATION_TEST_JVM_RUNTIME>${jvm.runtime}</DRUID_INTEGRATION_TEST_JVM_RUNTIME>
|
||||
<DRUID_INTEGRATION_TEST_GROUP>${groups}</DRUID_INTEGRATION_TEST_GROUP>
|
||||
<DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH>${override.config.path}</DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH>
|
||||
<DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH>${resource.file.dir.path}</DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH>>
|
||||
</environmentVariables>
|
||||
<executable>${project.basedir}/run_cluster.sh</executable>
|
||||
</configuration>
|
||||
|
@ -50,12 +50,25 @@
|
||||
mkdir -p $SHARED_DIR/hadoop-dependencies
|
||||
mkdir -p $SHARED_DIR/logs
|
||||
mkdir -p $SHARED_DIR/tasklogs
|
||||
mkdir -p $SHARED_DIR/docker/extensions
|
||||
mkdir -p $SHARED_DIR/docker/credentials
|
||||
|
||||
# install druid jars
|
||||
rm -rf $SHARED_DIR/docker
|
||||
cp -R docker $SHARED_DIR/docker
|
||||
mvn -B dependency:copy-dependencies -DoutputDirectory=$SHARED_DIR/docker/lib
|
||||
|
||||
# move extensions into a seperate extension folder
|
||||
# For druid-s3-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-s3-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-s3-extensions-* $SHARED_DIR/docker/extensions/druid-s3-extensions
|
||||
# For druid-azure-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-azure-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-azure-extensions-* $SHARED_DIR/docker/extensions/druid-azure-extensions
|
||||
# For druid-google-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-google-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-google-extensions-* $SHARED_DIR/docker/extensions/druid-google-extensions
|
||||
|
||||
# Pull Hadoop dependency if needed
|
||||
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
||||
then
|
||||
@ -73,8 +86,14 @@
|
||||
cp ../examples/quickstart/tutorial/wikiticker-2015-09-12-sampled.json.gz $SHARED_DIR/wikiticker-it/wikiticker-2015-09-12-sampled.json.gz
|
||||
cp docker/wiki-simple-lookup.json $SHARED_DIR/wikiticker-it/wiki-simple-lookup.json
|
||||
|
||||
# copy other files if needed
|
||||
if [ -n "$DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH" ]
|
||||
then
|
||||
cp -a $DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH/. $SHARED_DIR/docker/credentials/
|
||||
fi
|
||||
|
||||
# setup all enviornment variables to be pass to the containers
|
||||
COMMON_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/common"
|
||||
COMMON_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/common -e DRUID_INTEGRATION_TEST_GROUP"
|
||||
BROKER_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/broker"
|
||||
COORDINATOR_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/coordinator"
|
||||
HISTORICAL_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/historical"
|
||||
@ -89,6 +108,12 @@
|
||||
if [ -z "$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH" ]
|
||||
then
|
||||
echo "\$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH is not set. No override config file provided"
|
||||
if [ "$DRUID_INTEGRATION_TEST_GROUP" = "s3-deep-storage" ] || \
|
||||
[ "$DRUID_INTEGRATION_TEST_GROUP" = "gcs-deep-storage" ] || \
|
||||
[ "$DRUID_INTEGRATION_TEST_GROUP" = "azure-deep-storage" ]; then
|
||||
echo "Test group $DRUID_INTEGRATION_TEST_GROUP requires override config file. Stopping test..."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "\$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH is set with value ${DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH}"
|
||||
OVERRIDE_ENV="--env-file=$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH"
|
||||
|
@ -55,6 +55,8 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
||||
private Map<String, String> props = null;
|
||||
private String username;
|
||||
private String password;
|
||||
private String cloudBucket;
|
||||
private String cloudPath;
|
||||
|
||||
@JsonCreator
|
||||
ConfigFileConfigProvider(@JsonProperty("configFile") String configFile)
|
||||
@ -188,6 +190,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
||||
|
||||
password = props.get("password");
|
||||
|
||||
cloudBucket = props.get("cloud_bucket");
|
||||
cloudPath = props.get("cloud_path");
|
||||
|
||||
LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl);
|
||||
LOG.info("broker: [%s], [%s]", brokerUrl, brokerTLSUrl);
|
||||
LOG.info("historical: [%s], [%s]", historicalUrl, historicalTLSUrl);
|
||||
@ -337,6 +342,18 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
||||
return password;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCloudBucket()
|
||||
{
|
||||
return cloudBucket;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCloudPath()
|
||||
{
|
||||
return cloudPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getProperties()
|
||||
{
|
||||
|
@ -40,6 +40,12 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
||||
@JsonProperty
|
||||
private String extraDatasourceNameSuffix = "";
|
||||
|
||||
@JsonProperty
|
||||
private String cloudPath;
|
||||
|
||||
@JsonProperty
|
||||
private String cloudBucket;
|
||||
|
||||
@Override
|
||||
public IntegrationTestingConfig get()
|
||||
{
|
||||
@ -211,6 +217,18 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
||||
{
|
||||
return extraDatasourceNameSuffix;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCloudBucket()
|
||||
{
|
||||
return cloudBucket;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCloudPath()
|
||||
{
|
||||
return cloudPath;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -84,4 +84,8 @@ public interface IntegrationTestingConfig
|
||||
boolean manageKafkaTopic();
|
||||
|
||||
String getExtraDatasourceNameSuffix();
|
||||
|
||||
String getCloudBucket();
|
||||
|
||||
String getCloudPath();
|
||||
}
|
||||
|
@ -30,7 +30,23 @@ public class TestNGGroup
|
||||
public static final String KAFKA_INDEX = "kafka-index";
|
||||
public static final String OTHER_INDEX = "other-index";
|
||||
public static final String PERFECT_ROLLUP_PARALLEL_BATCH_INDEX = "perfect-rollup-parallel-batch-index";
|
||||
// This group can only be run individually using -Dgroups=query since it requires specific test data setup.
|
||||
public static final String QUERY = "query";
|
||||
public static final String REALTIME_INDEX = "realtime-index";
|
||||
// This group can only be run individually using -Dgroups=security since it requires specific test data setup.
|
||||
public static final String SECURITY = "security";
|
||||
// This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file.
|
||||
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// See integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
public static final String S3_DEEP_STORAGE = "s3-deep-storage";
|
||||
// This group is not part of CI. To run this group, gcs configs/credentials for your gcs must be provided in a file.
|
||||
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// See integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
|
||||
// The path to the folder that contains your GOOGLE_APPLICATION_CREDENTIALS file must also be pass
|
||||
// to mvn with -Dresource.file.dir.path=<PATH_TO_FOLDER>
|
||||
public static final String GCS_DEEP_STORAGE = "gcs-deep-storage";
|
||||
// This group is not part of CI. To run this group, azure configs/credentials for your azure must be provided in a file.
|
||||
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// See integration-tests/docker/environment-configs/override-examples/azures for env vars to provide.
|
||||
public static final String AZURE_DEEP_STORAGE = "azure-deep-storage";
|
||||
}
|
||||
|
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.AZURE_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITAzureParallelIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_URIS_KEY = "uris";
|
||||
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
|
||||
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
|
||||
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
|
||||
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
|
||||
private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
|
||||
|
||||
@DataProvider
|
||||
public static Object[][] resources()
|
||||
{
|
||||
return new Object[][]{
|
||||
{new Pair<>(INPUT_SOURCE_URIS_KEY,
|
||||
ImmutableList.of(
|
||||
"azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
|
||||
"azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
|
||||
"azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
|
||||
ImmutableList.of(
|
||||
"azure://%%BUCKET%%/%%PATH%%"
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
|
||||
ImmutableList.of(
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1),
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2),
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3)
|
||||
)
|
||||
)}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testAzureIndexData(Pair<String, List> azureInputSource) throws Exception
|
||||
{
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> azurePropsTransform = spec -> {
|
||||
try {
|
||||
String inputSourceValue = jsonMapper.writeValueAsString(azureInputSource.rhs);
|
||||
inputSourceValue = StringUtils.replace(
|
||||
inputSourceValue,
|
||||
"%%BUCKET%%",
|
||||
config.getCloudBucket()
|
||||
);
|
||||
inputSourceValue = StringUtils.replace(
|
||||
inputSourceValue,
|
||||
"%%PATH%%",
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%PARTITIONS_SPEC%%",
|
||||
jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null))
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_TYPE%%",
|
||||
"azure"
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_KEY%%",
|
||||
azureInputSource.lhs
|
||||
);
|
||||
return StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
|
||||
inputSourceValue
|
||||
);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
INDEX_TASK,
|
||||
azurePropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
|
||||
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
*/
|
||||
@Test(groups = TestNGGroup.GCS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITGcsParallelIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_URIS_KEY = "uris";
|
||||
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
|
||||
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
|
||||
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
|
||||
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
|
||||
private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
|
||||
|
||||
@DataProvider
|
||||
public static Object[][] resources()
|
||||
{
|
||||
return new Object[][]{
|
||||
{new Pair<>(INPUT_SOURCE_URIS_KEY,
|
||||
ImmutableList.of(
|
||||
"gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
|
||||
"gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
|
||||
"gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
|
||||
ImmutableList.of(
|
||||
"gs://%%BUCKET%%/%%PATH%%"
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
|
||||
ImmutableList.of(
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1),
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2),
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3)
|
||||
)
|
||||
)}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testGcsIndexData(Pair<String, List> gcsInputSource) throws Exception
|
||||
{
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> gcsPropsTransform = spec -> {
|
||||
try {
|
||||
String inputSourceValue = jsonMapper.writeValueAsString(gcsInputSource.rhs);
|
||||
inputSourceValue = StringUtils.replace(
|
||||
inputSourceValue,
|
||||
"%%BUCKET%%",
|
||||
config.getCloudBucket()
|
||||
);
|
||||
inputSourceValue = StringUtils.replace(
|
||||
inputSourceValue,
|
||||
"%%PATH%%",
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%PARTITIONS_SPEC%%",
|
||||
jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null))
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_TYPE%%",
|
||||
"google"
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_KEY%%",
|
||||
gcsInputSource.lhs
|
||||
);
|
||||
return StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
|
||||
inputSourceValue
|
||||
);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
INDEX_TASK,
|
||||
gcsPropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.S3_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITS3ParallelIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_URIS_KEY = "uris";
|
||||
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
|
||||
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
|
||||
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
|
||||
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
|
||||
private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
|
||||
|
||||
@DataProvider
|
||||
public static Object[][] resources()
|
||||
{
|
||||
return new Object[][]{
|
||||
{new Pair<>(INPUT_SOURCE_URIS_KEY,
|
||||
ImmutableList.of(
|
||||
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
|
||||
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
|
||||
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
|
||||
ImmutableList.of(
|
||||
"s3://%%BUCKET%%/%%PATH%%"
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
|
||||
ImmutableList.of(
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1),
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2),
|
||||
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3)
|
||||
)
|
||||
)}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testS3IndexData(Pair<String, List> s3InputSource) throws Exception
|
||||
{
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> s3PropsTransform = spec -> {
|
||||
try {
|
||||
String inputSourceValue = jsonMapper.writeValueAsString(s3InputSource.rhs);
|
||||
inputSourceValue = StringUtils.replace(
|
||||
inputSourceValue,
|
||||
"%%BUCKET%%",
|
||||
config.getCloudBucket()
|
||||
);
|
||||
inputSourceValue = StringUtils.replace(
|
||||
inputSourceValue,
|
||||
"%%PATH%%",
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%PARTITIONS_SPEC%%",
|
||||
jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null))
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_TYPE%%",
|
||||
"s3"
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_KEY%%",
|
||||
s3InputSource.lhs
|
||||
);
|
||||
return StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
|
||||
inputSourceValue
|
||||
);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
INDEX_TASK,
|
||||
s3PropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,87 @@
|
||||
{
|
||||
"type": "index_parallel",
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"timestampSpec": {
|
||||
"column": "timestamp"
|
||||
},
|
||||
"dimensionsSpec": {
|
||||
"dimensions": [
|
||||
"page",
|
||||
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||
"user",
|
||||
"unpatrolled",
|
||||
"newPage",
|
||||
"robot",
|
||||
"anonymous",
|
||||
"namespace",
|
||||
"continent",
|
||||
"country",
|
||||
"region",
|
||||
"city"
|
||||
]
|
||||
},
|
||||
"metricsSpec": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "thetaSketch",
|
||||
"type": "thetaSketch",
|
||||
"fieldName": "user"
|
||||
},
|
||||
{
|
||||
"name": "quantilesDoublesSketch",
|
||||
"type": "quantilesDoublesSketch",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "HLLSketchBuild",
|
||||
"type": "HLLSketchBuild",
|
||||
"fieldName": "user"
|
||||
}
|
||||
],
|
||||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||
}
|
||||
},
|
||||
"ioConfig": {
|
||||
"type": "index_parallel",
|
||||
"inputSource": {
|
||||
"type": "%%INPUT_SOURCE_TYPE%%",
|
||||
"%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%%
|
||||
},
|
||||
"inputFormat": {
|
||||
"type": "json"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
"type": "index_parallel",
|
||||
"maxNumConcurrentSubTasks": 10,
|
||||
"partitionsSpec": %%PARTITIONS_SPEC%%,
|
||||
"splitHintSpec": {
|
||||
"type": "maxSize",
|
||||
"maxSplitSize": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user