Adding s3, gcs, azure integration tests (#9501)

* exclude pulling s3 segments for tests that doesnt need it

* fix script

* fix script

* fix script

* add s3 test

* refactor sample data script

* add tests

* add tests

* add license header

* fix failing tests

* change bucket and path to config

* update integration test readme

* fix typo
This commit is contained in:
Maytas Monsereenusorn 2020-03-17 03:08:44 -07:00 committed by GitHub
parent b1847364b0
commit 4c620b8f1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 811 additions and 25 deletions

View File

@ -324,6 +324,14 @@ jobs:
script: *run_integration_test
after_failure: *integration_test_diags
- &integration_security
name: "(Compile=openjdk8, Run=openjdk8) security integration test"
jdk: openjdk8
services: *integration_test_services
env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=8'
script: *run_integration_test
after_failure: *integration_test_diags
- &integration_realtime_index
name: "(Compile=openjdk8, Run=openjdk8) realtime index integration test"
jdk: openjdk8
@ -336,7 +344,7 @@ jobs:
name: "(Compile=openjdk8, Run=openjdk8) other integration test"
jdk: openjdk8
services: *integration_test_services
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=8'
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
script: *run_integration_test
after_failure: *integration_test_diags
# END - Integration tests for Compile with Java 8 and Run with Java 8
@ -362,6 +370,11 @@ jobs:
jdk: openjdk8
env: TESTNG_GROUPS='-Dgroups=query' JVM_RUNTIME='-Djvm.runtime=11'
- <<: *integration_security
name: "(Compile=openjdk8, Run=openjdk11) security integration test"
jdk: openjdk8
env: TESTNG_GROUPS='-Dgroups=security' JVM_RUNTIME='-Djvm.runtime=11'
- <<: *integration_realtime_index
name: "(Compile=openjdk8, Run=openjdk11) realtime index integration test"
jdk: openjdk8
@ -370,9 +383,9 @@ jobs:
- <<: *integration_tests
name: "(Compile=openjdk8, Run=openjdk11) other integration test"
jdk: openjdk8
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index' JVM_RUNTIME='-Djvm.runtime=11'
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
# END - Integration tests for Compile with Java 8 and Run with Java 11
- name: "security vulnerabilities"
stage: cron
install: skip

View File

@ -48,9 +48,10 @@ export DOCKER_IP=127.0.0.1
## Running tests
To run all the tests using docker and mvn run the following command:
To run all tests from a test group using docker and mvn run the following command:
(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java)
```
mvn verify -P integration-tests
mvn verify -P integration-tests -Dgroups=<test_group>
```
To run only a single test using mvn run the following command:
@ -61,6 +62,12 @@ To run only a single test using mvn run the following command:
Add `-rf :druid-integration-tests` when running integration tests for the second time or later without changing
the code of core modules in between to skip up-to-date checks for the whole module dependency tree.
Integration tests can also be run with either Java 8 or Java 11 by adding -Djvm.runtime=# to mvn command, where #
can either be 8 or 11.
Druid's configuration (using Docker) can be overrided by providing -Doverride.config.path=<PATH_TO_FILE>.
The file must contain one property per line, the key must start with druid_ and the format should be snake case.
Running Tests Using A Configuration File for Any Cluster
-------------------
@ -79,6 +86,8 @@ To run tests on any druid cluster that is already running, create a configuratio
"coordinator_port": "<coordinator_port>",
"middlemanager_host": "<middle_manager_ip>",
"zookeeper_hosts": "<comma-separated list of zookeeper_ip:zookeeper_port>",
"cloud_bucket": "<(optional) cloud_bucket for test data if running cloud integration test>",
"cloud_path": "<(optional) cloud_path for test data if running cloud integration test>",
}
Set the environment variable CONFIG_FILE to the name of the configuration file:
@ -86,9 +95,10 @@ Set the environment variable CONFIG_FILE to the name of the configuration file:
export CONFIG_FILE=<config file name>
```
To run all the tests using mvn run the following command:
To run all tests from a test group using mvn run the following command:
(list of test groups can be found at integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java)
```
mvn verify -P int-tests-config-file
mvn verify -P int-tests-config-file -Dgroups=<test_group>
```
To run only a single test using mvn run the following command:
@ -96,6 +106,34 @@ To run only a single test using mvn run the following command:
mvn verify -P int-tests-config-file -Dit.test=<test_name>
```
Running a Test That Uses Cloud
-------------------
The integration test that indexes from Cloud or uses Cloud as deep storage is not run as part
of the integration test run discussed above. Running these tests requires the user to provide
their own Cloud.
Currently, the integration test supports Google Cloud Storage, Amazon S3, and Microsoft Azure.
These can be run by providing "gcs-deep-storage", "s3-deep-storage", or "azure-deep-storage"
to -Dgroups for Google Cloud Storage, Amazon S3, and Microsoft Azure respectively. Note that only
one group should be run per mvn command.
In addition to specifying the -Dgroups to mvn command, the following will need to be provided:
1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and
-Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file.
2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1.
3) Provide -Doverride.config.path=<PATH_TO_FILE> with your Cloud credentials/configs set. See
integration-tests/docker/environment-configs/override-examples/ directory for env vars to provide for each Cloud storage.
For running Google Cloud Storage, in addition to the above, you will also have to:
1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
For example, to run integration test for Google Cloud Storage:
```
mvn verify -P integration-tests -Dgroups=gcs-deep-storage -Doverride.config.path=<PATH_TO_FILE> -Dresource.file.dir.path=<PATH_TO_FOLDER> -Ddruid.test.config.cloudBucket=test-bucket -Ddruid.test.config.cloudPath=test-data-folder/
```
Running a Test That Uses Hadoop
-------------------

View File

@ -46,11 +46,7 @@ RUN ln -sf /usr/share/java/mysql-connector-java.jar /usr/local/druid/lib/mysql-c
RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
&& java -cp "/usr/local/druid/lib/*" -Ddruid.metadata.storage.type=mysql org.apache.druid.cli.Main tools metadata-init --connectURI="jdbc:mysql://localhost:3306/druid" --user=druid --password=diurd \
&& /etc/init.d/mysql stop
ADD sample-data.sql sample-data.sql
# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
&& cat sample-data.sql | mysql -u root druid \
&& /etc/init.d/mysql stop
ADD test-data /test-data
# Setup supervisord
ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
@ -101,6 +97,8 @@ WORKDIR /var/lib/druid
ENTRYPOINT /tls/generate-server-certs-and-keystores.sh \
# Create druid service config files with all the config variables
&& . /druid.sh; setupConfig \
# Some test groups require pre-existing data to be setup
&& . /druid.sh; setupData \
# Export the service config file path to use in supervisord conf file
&& export DRUID_COMMON_CONF_DIR="$(. /druid.sh; getConfPath ${DRUID_SERVICE})" \
# Export the common config file path to use in supervisord conf file

View File

@ -17,7 +17,8 @@
set -e
getConfPath() {
getConfPath()
{
cluster_conf_base=/tmp/conf/druid/cluster
case "$1" in
_common) echo $cluster_conf_base/_common ;;
@ -31,7 +32,8 @@ getConfPath() {
}
# Delete the old key (if existing) and append new key=value
setKey() {
setKey()
{
service="$1"
key="$2"
value="$3"
@ -45,7 +47,8 @@ setKey() {
echo "Setting $key=$value in $service_conf"
}
setupConfig() {
setupConfig()
{
echo "$(date -Is) configuring service $DRUID_SERVICE"
# We put all the config in /tmp/conf to allow for a
@ -63,7 +66,6 @@ setupConfig() {
setKey $DRUID_SERVICE druid.host $(resolveip -s $HOSTNAME)
setKey $DRUID_SERVICE druid.worker.ip $(resolveip -s $HOSTNAME)
# Write out all the environment variables starting with druid_ to druid service config file
# This will replace _ with . in the key
env | grep ^druid_ | while read evar;
@ -73,4 +75,23 @@ setupConfig() {
var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?g')
setKey $DRUID_SERVICE "$var" "$val"
done
}
}
setupData()
{
# The "query" and "security" test groups require data to be setup before running the tests.
# In particular, they requires segments to be download from a pre-existing s3 bucket.
# This is done by using the loadSpec put into metadatastore and s3 credientials set below.
if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "security" ]; then
# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
&& cat /test-data/${DRUID_INTEGRATION_TEST_GROUP}-sample-data.sql | mysql -u root druid && /etc/init.d/mysql stop
# below s3 credentials needed to access the pre-existing s3 bucket
setKey $DRUID_SERVICE druid.s3.accessKey AKIAJI7DG7CDECGBQ6NA
setKey $DRUID_SERVICE druid.s3.secretKey OBaLISDFjKLajSTrJ53JoTtzTZLjPlRePcwa+Pjv
setKey $DRUID_SERVICE druid.extensions.loadList [\"druid-s3-extensions\"]
# The region of the sample data s3 blobs needed for these test groups
export AWS_REGION=us-east-1
fi
}

View File

@ -26,6 +26,8 @@ COMMON_DRUID_JAVA_OPTS=-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Dlog4j.configu
DRUID_DEP_LIB_DIR=/shared/hadoop_xml/*:/shared/docker/lib/*:/usr/local/druid/lib/mysql-connector-java.jar
# Druid configs
druid_extensions_loadList=[]
druid_extensions_directory=/shared/docker/extensions
druid_auth_authenticator_basic_authorizerName=basic
druid_auth_authenticator_basic_initialAdminPassword=priest
druid_auth_authenticator_basic_initialInternalClientPassword=warlock

View File

@ -24,8 +24,6 @@ DRUID_LOG_PATH=/shared/logs/historical.log
SERVICE_DRUID_JAVA_OPTS=-server -Xmx512m -Xms512m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:+UseG1GC
# Druid configs
druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA
druid_s3_secretKey=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
druid_processing_buffer_sizeBytes=25000000
druid_processing_numThreads=2
druid_query_groupBy_maxOnDiskStorage=300000000

View File

@ -32,8 +32,6 @@ druid_indexer_runner_javaOptsArray=["-server", "-Xmx256m", "-Xms256m", "-XX:NewS
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=25000000
druid_indexer_fork_property_druid_processing_numThreads=1
druid_indexer_fork_server_http_numThreads=20
druid_s3_accessKey=AKIAJI7DG7CDECGBQ6NA
druid_s3_secretKey=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
druid_selectors_indexing_serviceName=druid/overlord
druid_indexer_task_chathandler_type=announce
druid_auth_basic_common_cacheDirectory=/tmp/authCache/middleManager

View File

@ -0,0 +1,28 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
#
druid_storage_type=azure
druid_azure_account=<OVERRIDE_THIS>
druid_azure_key=<OVERRIDE_THIS>
druid_azure_container=<OVERRIDE_THIS>
druid_extensions_loadList=["druid-azure-extensions"]

View File

@ -0,0 +1,29 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
#
druid_storage_type=google
druid_google_bucket=<OVERRIDE_THIS>
druid_google_prefix=<OVERRIDE_THIS>
druid_extensions_loadList=["druid-google-extensions"]
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>

View File

@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
#
druid_storage_type=s3
druid_storage_bucket=<OVERRIDE_THIS>
druid_storage_baseKey=druid/segments
druid_s3_accessKey=<OVERRIDE_THIS>
druid_s3_secretKey=<OVERRIDE_THIS>
AWS_REGION=<OVERRIDE_THIS>
druid_extensions_loadList=["druid-s3-extensions"]

View File

@ -4,4 +4,3 @@ redirect_stderr=true
priority=100
autorestart=false
stdout_logfile=%(ENV_DRUID_LOG_PATH)s
environment=AWS_REGION=us-east-1

View File

@ -18,5 +18,3 @@ INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,ver
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9','twitterstream','2013-05-13T00:03:48.807Z','2013-01-03T00:00:00.000Z','2013-01-04T00:00:00.000Z',0,'2013-01-04T04:09:13.590Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z\",\"version\":\"2013-01-04T04:09:13.590Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z/2013-01-04T04:09:13.590Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":411651320,\"identifier\":\"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9\"}');
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','wikipedia_editstream','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"wikipedia_editstream\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');
INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}');
INSERT INTO druid_tasks (id, created_date, datasource, payload, status_payload, active) VALUES ('index_auth_test_2030-04-30T01:13:31.893Z', '2030-04-30T01:13:31.893Z', 'auth_test', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"created_date\":\"2030-04-30T01:13:31.893Z\",\"datasource\":\"auth_test\",\"active\":0}', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"status\":\"SUCCESS\",\"duration\":1}', 0);
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','auth_test','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');

View File

@ -0,0 +1,17 @@
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
INSERT INTO druid_tasks (id, created_date, datasource, payload, status_payload, active) VALUES ('index_auth_test_2030-04-30T01:13:31.893Z', '2030-04-30T01:13:31.893Z', 'auth_test', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"created_date\":\"2030-04-30T01:13:31.893Z\",\"datasource\":\"auth_test\",\"active\":0}', '{\"id\":\"index_auth_test_2030-04-30T01:13:31.893Z\",\"status\":\"SUCCESS\",\"duration\":1}', 0);
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','auth_test','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');

View File

@ -43,6 +43,18 @@
<version>${project.parent.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-azure-extensions</artifactId>
<version>${project.parent.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-google-extensions</artifactId>
<version>${project.parent.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-datasketches</artifactId>
@ -268,6 +280,7 @@
<properties>
<start.hadoop.docker>false</start.hadoop.docker>
<override.config.path></override.config.path>
<resource.file.dir.path></resource.file.dir.path>
</properties>
<build>
<plugins>
@ -287,6 +300,7 @@
<DRUID_INTEGRATION_TEST_JVM_RUNTIME>${jvm.runtime}</DRUID_INTEGRATION_TEST_JVM_RUNTIME>
<DRUID_INTEGRATION_TEST_GROUP>${groups}</DRUID_INTEGRATION_TEST_GROUP>
<DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH>${override.config.path}</DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH>
<DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH>${resource.file.dir.path}</DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH>>
</environmentVariables>
<executable>${project.basedir}/run_cluster.sh</executable>
</configuration>

View File

@ -50,12 +50,25 @@
mkdir -p $SHARED_DIR/hadoop-dependencies
mkdir -p $SHARED_DIR/logs
mkdir -p $SHARED_DIR/tasklogs
mkdir -p $SHARED_DIR/docker/extensions
mkdir -p $SHARED_DIR/docker/credentials
# install druid jars
rm -rf $SHARED_DIR/docker
cp -R docker $SHARED_DIR/docker
mvn -B dependency:copy-dependencies -DoutputDirectory=$SHARED_DIR/docker/lib
# move extensions into a seperate extension folder
# For druid-s3-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-s3-extensions
mv $SHARED_DIR/docker/lib/druid-s3-extensions-* $SHARED_DIR/docker/extensions/druid-s3-extensions
# For druid-azure-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-azure-extensions
mv $SHARED_DIR/docker/lib/druid-azure-extensions-* $SHARED_DIR/docker/extensions/druid-azure-extensions
# For druid-google-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-google-extensions
mv $SHARED_DIR/docker/lib/druid-google-extensions-* $SHARED_DIR/docker/extensions/druid-google-extensions
# Pull Hadoop dependency if needed
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
then
@ -73,8 +86,14 @@
cp ../examples/quickstart/tutorial/wikiticker-2015-09-12-sampled.json.gz $SHARED_DIR/wikiticker-it/wikiticker-2015-09-12-sampled.json.gz
cp docker/wiki-simple-lookup.json $SHARED_DIR/wikiticker-it/wiki-simple-lookup.json
# copy other files if needed
if [ -n "$DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH" ]
then
cp -a $DRUID_INTEGRATION_TEST_RESOURCE_FILE_DIR_PATH/. $SHARED_DIR/docker/credentials/
fi
# setup all enviornment variables to be pass to the containers
COMMON_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/common"
COMMON_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/common -e DRUID_INTEGRATION_TEST_GROUP"
BROKER_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/broker"
COORDINATOR_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/coordinator"
HISTORICAL_ENV="--env-file=$ENVIRONMENT_CONFIGS_DIR/historical"
@ -89,6 +108,12 @@
if [ -z "$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH" ]
then
echo "\$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH is not set. No override config file provided"
if [ "$DRUID_INTEGRATION_TEST_GROUP" = "s3-deep-storage" ] || \
[ "$DRUID_INTEGRATION_TEST_GROUP" = "gcs-deep-storage" ] || \
[ "$DRUID_INTEGRATION_TEST_GROUP" = "azure-deep-storage" ]; then
echo "Test group $DRUID_INTEGRATION_TEST_GROUP requires override config file. Stopping test..."
exit 1
fi
else
echo "\$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH is set with value ${DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH}"
OVERRIDE_ENV="--env-file=$DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH"

View File

@ -55,6 +55,8 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
private Map<String, String> props = null;
private String username;
private String password;
private String cloudBucket;
private String cloudPath;
@JsonCreator
ConfigFileConfigProvider(@JsonProperty("configFile") String configFile)
@ -188,6 +190,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
password = props.get("password");
cloudBucket = props.get("cloud_bucket");
cloudPath = props.get("cloud_path");
LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl);
LOG.info("broker: [%s], [%s]", brokerUrl, brokerTLSUrl);
LOG.info("historical: [%s], [%s]", historicalUrl, historicalTLSUrl);
@ -337,6 +342,18 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
return password;
}
@Override
public String getCloudBucket()
{
return cloudBucket;
}
@Override
public String getCloudPath()
{
return cloudPath;
}
@Override
public Map<String, String> getProperties()
{

View File

@ -40,6 +40,12 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
@JsonProperty
private String extraDatasourceNameSuffix = "";
@JsonProperty
private String cloudPath;
@JsonProperty
private String cloudBucket;
@Override
public IntegrationTestingConfig get()
{
@ -211,6 +217,18 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
{
return extraDatasourceNameSuffix;
}
@Override
public String getCloudBucket()
{
return cloudBucket;
}
@Override
public String getCloudPath()
{
return cloudPath;
}
};
}
}

View File

@ -84,4 +84,8 @@ public interface IntegrationTestingConfig
boolean manageKafkaTopic();
String getExtraDatasourceNameSuffix();
String getCloudBucket();
String getCloudPath();
}

View File

@ -30,7 +30,23 @@ public class TestNGGroup
public static final String KAFKA_INDEX = "kafka-index";
public static final String OTHER_INDEX = "other-index";
public static final String PERFECT_ROLLUP_PARALLEL_BATCH_INDEX = "perfect-rollup-parallel-batch-index";
// This group can only be run individually using -Dgroups=query since it requires specific test data setup.
public static final String QUERY = "query";
public static final String REALTIME_INDEX = "realtime-index";
// This group can only be run individually using -Dgroups=security since it requires specific test data setup.
public static final String SECURITY = "security";
// This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file.
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
// See integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
public static final String S3_DEEP_STORAGE = "s3-deep-storage";
// This group is not part of CI. To run this group, gcs configs/credentials for your gcs must be provided in a file.
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
// See integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
// The path to the folder that contains your GOOGLE_APPLICATION_CREDENTIALS file must also be pass
// to mvn with -Dresource.file.dir.path=<PATH_TO_FOLDER>
public static final String GCS_DEEP_STORAGE = "gcs-deep-storage";
// This group is not part of CI. To run this group, azure configs/credentials for your azure must be provided in a file.
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
// See integration-tests/docker/environment-configs/override-examples/azures for env vars to provide.
public static final String AZURE_DEEP_STORAGE = "azure-deep-storage";
}

View File

@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.indexer;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.io.Closeable;
import java.util.List;
import java.util.UUID;
import java.util.function.Function;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
*/
@Test(groups = TestNGGroup.AZURE_DEEP_STORAGE)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITAzureParallelIndexTest extends AbstractITBatchIndexTest
{
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
@DataProvider
public static Object[][] resources()
{
return new Object[][]{
{new Pair<>(INPUT_SOURCE_URIS_KEY,
ImmutableList.of(
"azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
"azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
"azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
)
)},
{new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
ImmutableList.of(
"azure://%%BUCKET%%/%%PATH%%"
)
)},
{new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
ImmutableList.of(
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1),
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2),
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3)
)
)}
};
}
@Test(dataProvider = "resources")
public void testAzureIndexData(Pair<String, List> azureInputSource) throws Exception
{
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> azurePropsTransform = spec -> {
try {
String inputSourceValue = jsonMapper.writeValueAsString(azureInputSource.rhs);
inputSourceValue = StringUtils.replace(
inputSourceValue,
"%%BUCKET%%",
config.getCloudBucket()
);
inputSourceValue = StringUtils.replace(
inputSourceValue,
"%%PATH%%",
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%PARTITIONS_SPEC%%",
jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null))
);
spec = StringUtils.replace(
spec,
"%%INPUT_SOURCE_TYPE%%",
"azure"
);
spec = StringUtils.replace(
spec,
"%%INPUT_SOURCE_PROPERTY_KEY%%",
azureInputSource.lhs
);
return StringUtils.replace(
spec,
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
inputSourceValue
);
}
catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(
INDEX_DATASOURCE,
INDEX_TASK,
azurePropsTransform,
INDEX_QUERIES_RESOURCE,
false,
true,
true
);
}
}
}

View File

@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.indexer;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.io.Closeable;
import java.util.List;
import java.util.UUID;
import java.util.function.Function;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
*/
@Test(groups = TestNGGroup.GCS_DEEP_STORAGE)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITGcsParallelIndexTest extends AbstractITBatchIndexTest
{
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
@DataProvider
public static Object[][] resources()
{
return new Object[][]{
{new Pair<>(INPUT_SOURCE_URIS_KEY,
ImmutableList.of(
"gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
"gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
"gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
)
)},
{new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
ImmutableList.of(
"gs://%%BUCKET%%/%%PATH%%"
)
)},
{new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
ImmutableList.of(
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1),
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2),
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3)
)
)}
};
}
@Test(dataProvider = "resources")
public void testGcsIndexData(Pair<String, List> gcsInputSource) throws Exception
{
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> gcsPropsTransform = spec -> {
try {
String inputSourceValue = jsonMapper.writeValueAsString(gcsInputSource.rhs);
inputSourceValue = StringUtils.replace(
inputSourceValue,
"%%BUCKET%%",
config.getCloudBucket()
);
inputSourceValue = StringUtils.replace(
inputSourceValue,
"%%PATH%%",
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%PARTITIONS_SPEC%%",
jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null))
);
spec = StringUtils.replace(
spec,
"%%INPUT_SOURCE_TYPE%%",
"google"
);
spec = StringUtils.replace(
spec,
"%%INPUT_SOURCE_PROPERTY_KEY%%",
gcsInputSource.lhs
);
return StringUtils.replace(
spec,
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
inputSourceValue
);
}
catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(
INDEX_DATASOURCE,
INDEX_TASK,
gcsPropsTransform,
INDEX_QUERIES_RESOURCE,
false,
true,
true
);
}
}
}

View File

@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.indexer;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.io.Closeable;
import java.util.List;
import java.util.UUID;
import java.util.function.Function;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
*/
@Test(groups = TestNGGroup.S3_DEEP_STORAGE)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITS3ParallelIndexTest extends AbstractITBatchIndexTest
{
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
@DataProvider
public static Object[][] resources()
{
return new Object[][]{
{new Pair<>(INPUT_SOURCE_URIS_KEY,
ImmutableList.of(
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
)
)},
{new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
ImmutableList.of(
"s3://%%BUCKET%%/%%PATH%%"
)
)},
{new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
ImmutableList.of(
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_1),
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_2),
ImmutableMap.of("bucket", "%%BUCKET%%", "path", "%%PATH%%" + WIKIPEDIA_DATA_3)
)
)}
};
}
@Test(dataProvider = "resources")
public void testS3IndexData(Pair<String, List> s3InputSource) throws Exception
{
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> s3PropsTransform = spec -> {
try {
String inputSourceValue = jsonMapper.writeValueAsString(s3InputSource.rhs);
inputSourceValue = StringUtils.replace(
inputSourceValue,
"%%BUCKET%%",
config.getCloudBucket()
);
inputSourceValue = StringUtils.replace(
inputSourceValue,
"%%PATH%%",
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%PARTITIONS_SPEC%%",
jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null, null))
);
spec = StringUtils.replace(
spec,
"%%INPUT_SOURCE_TYPE%%",
"s3"
);
spec = StringUtils.replace(
spec,
"%%INPUT_SOURCE_PROPERTY_KEY%%",
s3InputSource.lhs
);
return StringUtils.replace(
spec,
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
inputSourceValue
);
}
catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(
INDEX_DATASOURCE,
INDEX_TASK,
s3PropsTransform,
INDEX_QUERIES_RESOURCE,
false,
true,
true
);
}
}
}

View File

@ -0,0 +1,87 @@
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "%%DATASOURCE%%",
"timestampSpec": {
"column": "timestamp"
},
"dimensionsSpec": {
"dimensions": [
"page",
{"type": "string", "name": "language", "createBitmapIndex": false},
"user",
"unpatrolled",
"newPage",
"robot",
"anonymous",
"namespace",
"continent",
"country",
"region",
"city"
]
},
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
},
{
"name": "thetaSketch",
"type": "thetaSketch",
"fieldName": "user"
},
{
"name": "quantilesDoublesSketch",
"type": "quantilesDoublesSketch",
"fieldName": "delta"
},
{
"name": "HLLSketchBuild",
"type": "HLLSketchBuild",
"fieldName": "user"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second",
"intervals" : [ "2013-08-31/2013-09-02" ]
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "%%INPUT_SOURCE_TYPE%%",
"%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%%
},
"inputFormat": {
"type": "json"
}
},
"tuningConfig": {
"type": "index_parallel",
"maxNumConcurrentSubTasks": 10,
"partitionsSpec": %%PARTITIONS_SPEC%%,
"splitHintSpec": {
"type": "maxSize",
"maxSplitSize": 1
}
}
}
}