More Hadoop integration tests (#9714)

* More Hadoop integration tests

* Add missing s3 instructions

* Address PR comments

* Address PR comments

* PR comments

* Fix typo
This commit is contained in:
Jonathan Wei 2020-04-30 14:33:01 -07:00 committed by GitHub
parent c61365c1e0
commit 61295bd002
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 1501 additions and 102 deletions

View File

@ -373,7 +373,7 @@ jobs:
name: "(Compile=openjdk8, Run=openjdk8) other integration test" name: "(Compile=openjdk8, Run=openjdk8) other integration test"
jdk: openjdk8 jdk: openjdk8
services: *integration_test_services services: *integration_test_services
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=8' env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
script: *run_integration_test script: *run_integration_test
after_failure: *integration_test_diags after_failure: *integration_test_diags
# END - Integration tests for Compile with Java 8 and Run with Java 8 # END - Integration tests for Compile with Java 8 and Run with Java 8
@ -407,7 +407,7 @@ jobs:
- <<: *integration_tests - <<: *integration_tests
name: "(Compile=openjdk8, Run=openjdk11) other integration test" name: "(Compile=openjdk8, Run=openjdk11) other integration test"
jdk: openjdk8 jdk: openjdk8
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=11' env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
# END - Integration tests for Compile with Java 8 and Run with Java 11 # END - Integration tests for Compile with Java 8 and Run with Java 11
- name: "security vulnerabilities" - name: "security vulnerabilities"

View File

@ -214,8 +214,8 @@ of the integration test run discussed above. This is because druid
test clusters might not, in general, have access to hadoop. test clusters might not, in general, have access to hadoop.
This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage. This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage.
To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways: To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways:
1) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE). 1) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command.
2) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command. 2) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE).
Currently, hdfs-deep-storage and other <cloud>-deep-storage integration test groups can only be run with Currently, hdfs-deep-storage and other <cloud>-deep-storage integration test groups can only be run with
Druid Docker test clusters by passing -Dstart.hadoop.docker=true to start Hadoop container. Druid Docker test clusters by passing -Dstart.hadoop.docker=true to start Hadoop container.
@ -224,21 +224,23 @@ See integration-tests/docker/environment-configs/override-examples/hdfs director
Note that if the integration test you are running also uses other cloud extension (S3, Azure, GCS), additional Note that if the integration test you are running also uses other cloud extension (S3, Azure, GCS), additional
credentials/configs may need to be set in the same file as your Druid's Hadoop configs set. credentials/configs may need to be set in the same file as your Druid's Hadoop configs set.
Currently, ITHadoopIndexTest can only be run with your own Druid + Hadoop cluster by following the below steps: If you are running ITHadoopIndexTest with your own Druid + Hadoop cluster, please follow the below steps:
Create a directory called batchHadoop1 in the hadoop file system - Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
(anywhere you want) and put batch_hadoop.data (integration-tests/src/test/resources/hadoop/batch_hadoop.data) located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/
into that directory (as its only file). - Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
at /batch_index/tsv/
If using the Docker-based Hadoop container, the steps above are automatically done by the integration tests.
Add this keyword to the configuration file (see above): When running the Hadoop tests, you must set `-Dextra.datasource.name.suffix=''`, due to https://github.com/apache/druid/issues/9788.
Run the test using mvn (using the bundled Docker-based Hadoop cluster):
``` ```
"hadoopTestDir": "<name_of_dir_containing_batchHadoop1>" mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Dstart.hadoop.docker=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix=''
``` ```
Run the test using mvn: Run the test using mvn (using config file for existing Hadoop cluster):
``` ```
mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest -Dextra.datasource.name.suffix=''
``` ```
In some test environments, the machine where the tests need to be executed In some test environments, the machine where the tests need to be executed

View File

@ -0,0 +1,31 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
#
druid_storage_type=azure
druid_azure_account=<OVERRIDE_THIS>
druid_azure_key=<OVERRIDE_THIS>
druid_azure_container=<OVERRIDE_THIS>
druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"]
druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]

View File

@ -0,0 +1,34 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
#
druid_storage_type=hdfs
druid_storage_storageDirectory=/druid/segments
druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"]
# Not used since we have HDFS deep storage, but the Druid Azure extension requires these to be defined
druid_azure_account=<OVERRIDE_THIS>
druid_azure_key=<OVERRIDE_THIS>
druid_azure_container=<OVERRIDE_THIS>
druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]

View File

@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
#
druid_storage_type=google
druid_google_bucket=<OVERRIDE_THIS>
druid_google_prefix=<OVERRIDE_THIS>
druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"]
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>

View File

@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
#
druid_storage_type=hdfs
druid_storage_storageDirectory=/druid/segments
druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"]
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>

View File

@ -0,0 +1,34 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
#
druid_s3_accessKey=<OVERRIDE_THIS>
druid_s3_secretKey=<OVERRIDE_THIS>
druid_storage_type=hdfs
druid_storage_storageDirectory=/druid/segments
AWS_REGION=<OVERRIDE_THIS>
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]

View File

@ -0,0 +1,35 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Example of override config file to provide.
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
#
druid_s3_accessKey=<OVERRIDE_THIS>
druid_s3_secretKey=<OVERRIDE_THIS>
druid_storage_type=s3
druid_storage_bucket=<OVERRIDE_THIS>
druid_storage_baseKey=<OVERRIDE_THIS>
AWS_REGION=<OVERRIDE_THIS>
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]

View File

@ -27,3 +27,5 @@ SERVICE_DRUID_JAVA_OPTS=-server -Xmx128m -XX:+UseG1GC -agentlib:jdwp=transport=d
druid_auth_basic_common_cacheDirectory=/tmp/authCache/router druid_auth_basic_common_cacheDirectory=/tmp/authCache/router
druid_sql_avatica_enable=true druid_sql_avatica_enable=true
druid_server_https_crlPath=/tls/revocations.crl druid_server_https_crlPath=/tls/revocations.crl
druid_router_managementProxy_enabled=true

View File

@ -32,5 +32,3 @@ cd client_tls
../docker/tls/generate-to-be-revoked-client-cert.sh ../docker/tls/generate-to-be-revoked-client-cert.sh
../docker/tls/generate-untrusted-root-client-cert.sh ../docker/tls/generate-untrusted-root-client-cert.sh
../docker/tls/generate-valid-intermediate-client-cert.sh ../docker/tls/generate-valid-intermediate-client-cert.sh

View File

@ -118,7 +118,7 @@ rm -rf certs.seq
echo 11111115 > certs.seq echo 11111115 > certs.seq
# Generate a client certificate for this machine # Generate a client certificate for this machine
openssl genrsa -out expired_client.key 1024 -sha256 openssl genrsa -out expired_client.key 1024
openssl req -new -out expired_client.csr -key expired_client.key -reqexts req_ext -config expired_csr.conf openssl req -new -out expired_client.csr -key expired_client.key -reqexts req_ext -config expired_csr.conf
openssl ca -batch -config root_for_expired_client.cnf -policy policy_loose -out expired_client.pem -outdir . -startdate 101010000000Z -enddate 101011000000Z -extensions v3_ca -cert root.pem -keyfile root.key -infiles expired_client.csr openssl ca -batch -config root_for_expired_client.cnf -policy policy_loose -out expired_client.pem -outdir . -startdate 101010000000Z -enddate 101011000000Z -extensions v3_ca -cert root.pem -keyfile root.key -infiles expired_client.csr

View File

@ -50,7 +50,7 @@ DNS.2 = localhost
EOT EOT
# Generate a client certificate for this machine # Generate a client certificate for this machine
openssl genrsa -out client.key 1024 -sha256 openssl genrsa -out client.key 1024
openssl req -new -out client.csr -key client.key -reqexts req_ext -config csr.conf openssl req -new -out client.csr -key client.key -reqexts req_ext -config csr.conf
openssl x509 -req -days 3650 -in client.csr -CA root.pem -CAkey root.key -set_serial 0x11111111 -out client.pem -sha256 -extfile csr.conf -extensions req_ext openssl x509 -req -days 3650 -in client.csr -CA root.pem -CAkey root.key -set_serial 0x11111111 -out client.pem -sha256 -extfile csr.conf -extensions req_ext

View File

@ -46,7 +46,7 @@ DNS.1 = thisisprobablywrongtoo
EOT EOT
openssl genrsa -out invalid_hostname_client.key 1024 -sha256 openssl genrsa -out invalid_hostname_client.key 1024
openssl req -new -out invalid_hostname_client.csr -key invalid_hostname_client.key -reqexts req_ext -config invalid_hostname_csr.conf openssl req -new -out invalid_hostname_client.csr -key invalid_hostname_client.key -reqexts req_ext -config invalid_hostname_csr.conf
openssl x509 -req -days 3650 -in invalid_hostname_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111112 -out invalid_hostname_client.pem -sha256 -extfile invalid_hostname_csr.conf -extensions req_ext openssl x509 -req -days 3650 -in invalid_hostname_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111112 -out invalid_hostname_client.pem -sha256 -extfile invalid_hostname_csr.conf -extensions req_ext

View File

@ -45,7 +45,7 @@ IP.1 = 9.9.9.9
EOT EOT
# Generate a bad intermediate certificate # Generate a bad intermediate certificate
openssl genrsa -out invalid_ca_intermediate.key 1024 -sha256 openssl genrsa -out invalid_ca_intermediate.key 1024
openssl req -new -out invalid_ca_intermediate.csr -key invalid_ca_intermediate.key -reqexts req_ext -config invalid_ca_intermediate.conf openssl req -new -out invalid_ca_intermediate.csr -key invalid_ca_intermediate.key -reqexts req_ext -config invalid_ca_intermediate.conf
openssl x509 -req -days 3650 -in invalid_ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333331 -out invalid_ca_intermediate.pem -sha256 -extfile invalid_ca_intermediate.conf -extensions req_ext openssl x509 -req -days 3650 -in invalid_ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333331 -out invalid_ca_intermediate.pem -sha256 -extfile invalid_ca_intermediate.conf -extensions req_ext
@ -81,7 +81,7 @@ DNS.2 = localhost
EOT EOT
# Generate a client certificate for this machine # Generate a client certificate for this machine
openssl genrsa -out invalid_ca_client.key 1024 -sha256 openssl genrsa -out invalid_ca_client.key 1024
openssl req -new -out invalid_ca_client.csr -key invalid_ca_client.key -reqexts req_ext -config invalid_ca_client.conf openssl req -new -out invalid_ca_client.csr -key invalid_ca_client.key -reqexts req_ext -config invalid_ca_client.conf
openssl x509 -req -days 3650 -in invalid_ca_client.csr -CA invalid_ca_intermediate.pem -CAkey invalid_ca_intermediate.key -set_serial 0x33333333 -out invalid_ca_client.pem -sha256 -extfile invalid_ca_client.conf -extensions req_ext openssl x509 -req -days 3650 -in invalid_ca_client.csr -CA invalid_ca_intermediate.pem -CAkey invalid_ca_intermediate.key -set_serial 0x33333333 -out invalid_ca_client.pem -sha256 -extfile invalid_ca_client.conf -extensions req_ext

View File

@ -52,9 +52,9 @@ DNS.2 = localhost
EOT EOT
# Generate a client certificate for this machine # Generate a client certificate for this machine
openssl genrsa -out revoked_client.key 1024 -sha256 openssl genrsa -out revoked_client.key 1024
openssl req -new -out revoked_client.csr -key revoked_client.key -reqexts req_ext -config revoked_csr.conf openssl req -new -out revoked_client.csr -key revoked_client.key -reqexts req_ext -config revoked_csr.conf
openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile csr.conf -extensions req_ext openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile revoked_csr.conf -extensions req_ext
# Create a Java keystore containing the generated certificate # Create a Java keystore containing the generated certificate
openssl pkcs12 -export -in revoked_client.pem -inkey revoked_client.key -out revoked_client.p12 -name revoked_druid -CAfile root.pem -caname druid-it-root -password pass:druid123 openssl pkcs12 -export -in revoked_client.pem -inkey revoked_client.key -out revoked_client.p12 -name revoked_druid -CAfile root.pem -caname druid-it-root -password pass:druid123

View File

@ -50,7 +50,7 @@ DNS.2 = localhost
EOT EOT
# Generate a client certificate for this machine # Generate a client certificate for this machine
openssl genrsa -out client_another_root.key 1024 -sha256 openssl genrsa -out client_another_root.key 1024
openssl req -new -out client_another_root.csr -key client_another_root.key -reqexts req_ext -config csr_another_root.conf openssl req -new -out client_another_root.csr -key client_another_root.key -reqexts req_ext -config csr_another_root.conf
openssl x509 -req -days 3650 -in client_another_root.csr -CA untrusted_root.pem -CAkey untrusted_root.key -set_serial 0x11111114 -out client_another_root.pem -sha256 -extfile csr_another_root.conf -extensions req_ext openssl x509 -req -days 3650 -in client_another_root.csr -CA untrusted_root.pem -CAkey untrusted_root.key -set_serial 0x11111114 -out client_another_root.pem -sha256 -extfile csr_another_root.conf -extensions req_ext

View File

@ -45,7 +45,7 @@ IP.1 = 9.9.9.9
EOT EOT
# Generate an intermediate certificate # Generate an intermediate certificate
openssl genrsa -out ca_intermediate.key 1024 -sha256 openssl genrsa -out ca_intermediate.key 1024
openssl req -new -out ca_intermediate.csr -key ca_intermediate.key -reqexts req_ext -config ca_intermediate.conf openssl req -new -out ca_intermediate.csr -key ca_intermediate.key -reqexts req_ext -config ca_intermediate.conf
openssl x509 -req -days 3650 -in ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333332 -out ca_intermediate.pem -sha256 -extfile ca_intermediate.conf -extensions req_ext openssl x509 -req -days 3650 -in ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333332 -out ca_intermediate.pem -sha256 -extfile ca_intermediate.conf -extensions req_ext
@ -81,7 +81,7 @@ DNS.2 = localhost
EOT EOT
# Generate a client certificate for this machine # Generate a client certificate for this machine
openssl genrsa -out intermediate_ca_client.key 1024 -sha256 openssl genrsa -out intermediate_ca_client.key 1024
openssl req -new -out intermediate_ca_client.csr -key intermediate_ca_client.key -reqexts req_ext -config intermediate_ca_client.conf openssl req -new -out intermediate_ca_client.csr -key intermediate_ca_client.key -reqexts req_ext -config intermediate_ca_client.conf
openssl x509 -req -days 3650 -in intermediate_ca_client.csr -CA ca_intermediate.pem -CAkey ca_intermediate.key -set_serial 0x33333333 -out intermediate_ca_client.pem -sha256 -extfile intermediate_ca_client.conf -extensions req_ext openssl x509 -req -days 3650 -in intermediate_ca_client.csr -CA ca_intermediate.pem -CAkey ca_intermediate.key -set_serial 0x33333333 -out intermediate_ca_client.pem -sha256 -extfile intermediate_ca_client.conf -extensions req_ext

View File

@ -365,6 +365,7 @@
<skip.stop.docker>false</skip.stop.docker> <skip.stop.docker>false</skip.stop.docker>
<override.config.path></override.config.path> <override.config.path></override.config.path>
<resource.file.dir.path></resource.file.dir.path> <resource.file.dir.path></resource.file.dir.path>
<extra.datasource.name.suffix>\ Россия\ 한국\ 中国!?</extra.datasource.name.suffix>
</properties> </properties>
<build> <build>
<plugins> <plugins>
@ -430,7 +431,7 @@
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Ddruid.test.config.dockerIp=${env.DOCKER_IP} -Ddruid.test.config.dockerIp=${env.DOCKER_IP}
-Ddruid.test.config.hadoopDir=${env.HADOOP_DIR} -Ddruid.test.config.hadoopDir=${env.HADOOP_DIR}
-Ddruid.test.config.extraDatasourceNameSuffix=\ Россия\ 한국\ 中国!? -Ddruid.test.config.extraDatasourceNameSuffix=${extra.datasource.name.suffix}
-Ddruid.zk.service.host=${env.DOCKER_IP} -Ddruid.zk.service.host=${env.DOCKER_IP}
-Ddruid.client.https.trustStorePath=client_tls/truststore.jks -Ddruid.client.https.trustStorePath=client_tls/truststore.jks
-Ddruid.client.https.trustStorePassword=druid123 -Ddruid.client.https.trustStorePassword=druid123

View File

@ -80,17 +80,18 @@ if [ -n "$DRUID_INTEGRATION_TEST_SKIP_START_DOCKER" ] && [ "$DRUID_INTEGRATION_T
# For druid-kinesis-indexing-service # For druid-kinesis-indexing-service
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
$ For druid-parquet-extensions # For druid-parquet-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
$ For druid-orc-extensions # For druid-orc-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions
# Pull Hadoop dependency if needed # Pull Hadoop dependency if needed
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
then then
java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 -h org.apache.hadoop:hadoop-azure:2.8.5
curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar
fi fi
# install logging config # install logging config

View File

@ -57,6 +57,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
private String password; private String password;
private String cloudBucket; private String cloudBucket;
private String cloudPath; private String cloudPath;
private String cloudRegion;
private String hadoopGcsCredentialsPath;
private String azureKey;
private String streamEndpoint; private String streamEndpoint;
@JsonCreator @JsonCreator
@ -193,6 +196,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
cloudBucket = props.get("cloud_bucket"); cloudBucket = props.get("cloud_bucket");
cloudPath = props.get("cloud_path"); cloudPath = props.get("cloud_path");
cloudRegion = props.get("cloud_region");
hadoopGcsCredentialsPath = props.get("hadoopGcsCredentialsPath");
azureKey = props.get("azureKey");
streamEndpoint = props.get("stream_endpoint"); streamEndpoint = props.get("stream_endpoint");
LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl); LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl);
@ -356,6 +362,24 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
return cloudPath; return cloudPath;
} }
@Override
public String getCloudRegion()
{
return cloudRegion;
}
@Override
public String getAzureKey()
{
return azureKey;
}
@Override
public String getHadoopGcsCredentialsPath()
{
return hadoopGcsCredentialsPath;
}
@Override @Override
public String getStreamEndpoint() public String getStreamEndpoint()
{ {

View File

@ -33,10 +33,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
@NotNull @NotNull
private String dockerIp; private String dockerIp;
@JsonProperty
@NotNull
private String hadoopDir;
@JsonProperty @JsonProperty
private String extraDatasourceNameSuffix = ""; private String extraDatasourceNameSuffix = "";
@ -46,6 +42,15 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
@JsonProperty @JsonProperty
private String cloudBucket; private String cloudBucket;
@JsonProperty
private String cloudRegion;
@JsonProperty
private String hadoopGcsCredentialsPath;
@JsonProperty
private String azureKey;
@JsonProperty @JsonProperty
private String streamEndpoint; private String streamEndpoint;
@ -185,9 +190,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
@Override @Override
public String getProperty(String prop) public String getProperty(String prop)
{ {
if ("hadoopTestDir".equals(prop)) {
return hadoopDir;
}
throw new UnsupportedOperationException("DockerConfigProvider does not support property " + prop); throw new UnsupportedOperationException("DockerConfigProvider does not support property " + prop);
} }
@ -233,6 +235,24 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
return cloudPath; return cloudPath;
} }
@Override
public String getCloudRegion()
{
return cloudRegion;
}
@Override
public String getAzureKey()
{
return azureKey;
}
@Override
public String getHadoopGcsCredentialsPath()
{
return hadoopGcsCredentialsPath;
}
@Override @Override
public String getStreamEndpoint() public String getStreamEndpoint()
{ {

View File

@ -89,5 +89,11 @@ public interface IntegrationTestingConfig
String getCloudPath(); String getCloudPath();
String getCloudRegion();
String getAzureKey();
String getHadoopGcsCredentialsPath();
String getStreamEndpoint(); String getStreamEndpoint();
} }

View File

@ -27,8 +27,6 @@ public class TestNGGroup
{ {
public static final String BATCH_INDEX = "batch-index"; public static final String BATCH_INDEX = "batch-index";
public static final String HADOOP_INDEX = "hadoop-index";
public static final String KAFKA_INDEX = "kafka-index"; public static final String KAFKA_INDEX = "kafka-index";
public static final String KAFKA_INDEX_SLOW = "kafka-index-slow"; public static final String KAFKA_INDEX_SLOW = "kafka-index-slow";
@ -86,6 +84,15 @@ public class TestNGGroup
*/ */
public static final String HDFS_DEEP_STORAGE = "hdfs-deep-storage"; public static final String HDFS_DEEP_STORAGE = "hdfs-deep-storage";
public static final String HADOOP_S3_TO_S3 = "hadoop-s3-to-s3-deep-storage";
public static final String HADOOP_S3_TO_HDFS = "hadoop-s3-to-hdfs-deep-storage";
public static final String HADOOP_AZURE_TO_AZURE = "hadoop-azure-to-azure-deep-storage";
public static final String HADOOP_AZURE_TO_HDFS = "hadoop-azure-to-hdfs-deep-storage";
public static final String HADOOP_GCS_TO_GCS = "hadoop-gcs-to-gcs-deep-storage";
public static final String HADOOP_GCS_TO_HDFS = "hadoop-gcs-to-hdfs-deep-storage";
/** /**
* This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file. * This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file.
* The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE> * The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>

View File

@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
import java.io.Closeable;
import java.util.UUID;
import java.util.function.Function;
public abstract class AbstractAzureInputHadoopIndexTest extends AbstractITBatchIndexTest
{
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_azure_input_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
void doTest() throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> azurePropsTransform = spec -> {
try {
String path = StringUtils.format(
"wasbs://%s@%s.blob.core.windows.net/",
config.getCloudPath(),
config.getCloudBucket()
);
spec = StringUtils.replace(
spec,
"%%INPUT_PATHS%%",
path
);
spec = StringUtils.replace(
spec,
"%%AZURE_ACCOUNT%%",
config.getCloudBucket()
);
spec = StringUtils.replace(
spec,
"%%AZURE_KEY%%",
config.getAzureKey()
);
return spec;
}
catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(
indexDatasource,
INDEX_TASK,
azurePropsTransform,
INDEX_QUERIES_RESOURCE,
false,
true,
true
);
}
}
}

View File

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
import java.io.Closeable;
import java.util.UUID;
import java.util.function.Function;
public abstract class AbstractGcsInputHadoopIndexTest extends AbstractITBatchIndexTest
{
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_gcs_input_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
void doTest() throws Exception
{
final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> gcsPropsTransform = spec -> {
try {
String path = StringUtils.format(
"gs://%s/%s",
config.getCloudBucket(),
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%INPUT_PATHS%%",
path
);
spec = StringUtils.replace(
spec,
"%%GCS_KEYFILE_PATH%%",
config.getHadoopGcsCredentialsPath()
);
return spec;
}
catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(
indexDatasource,
INDEX_TASK,
gcsPropsTransform,
INDEX_QUERIES_RESOURCE,
false,
true,
true
);
}
}
}

View File

@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import com.google.inject.Inject;
import org.apache.druid.common.aws.AWSCredentialsConfig;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
import java.io.Closeable;
import java.util.UUID;
import java.util.function.Function;
public abstract class AbstractS3InputHadoopIndexTest extends AbstractITBatchIndexTest
{
@Inject
protected AWSCredentialsConfig awsCredentialsConfig;
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_s3_input_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
void doTest() throws Exception
{
final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> s3PropsTransform = spec -> {
try {
String path = StringUtils.format(
"s3a://%s/%s",
config.getCloudBucket(),
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%INPUT_PATHS%%",
path
);
spec = StringUtils.replace(
spec,
"%%AWS_ACCESS_KEY%%",
awsCredentialsConfig.getAccessKey().getPassword()
);
spec = StringUtils.replace(
spec,
"%%AWS_SECRET_KEY%%",
awsCredentialsConfig.getSecretKey().getPassword()
);
spec = StringUtils.replace(
spec,
"%%AWS_REGION%%",
config.getCloudRegion()
);
return spec;
}
catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(
indexDatasource,
INDEX_TASK,
s3PropsTransform,
INDEX_QUERIES_RESOURCE,
false,
true,
true
);
}
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the account, container, and key for your data.
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath,
* and -Ddruid.test.config.azureKey.
* - `cloudBucket` should be set to your Azure account name
* - `cloudPath` should be set to the Azure container where the input data resides
* - `azureKey` should be set to an Azure access key that can read from the container above
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure+Hadoop configs set. See
* integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure for env vars to provide.
* 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
*/
@Test(groups = TestNGGroup.HADOOP_AZURE_TO_AZURE)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITAzureInputToAzureHadoopIndexTest extends AbstractAzureInputHadoopIndexTest
{
public void testGcsIndexData() throws Exception
{
doTest();
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the account, container, and key for your data.
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath,
* and -Ddruid.test.config.azureKey.
* - `cloudBucket` should be set to your Azure account name
* - `cloudPath` should be set to the Azure container where the input data resides
* - `azureKey` should be set to an Azure access key that can read from the container above
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure+Hadoop configs set. See
* integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs for env vars to provide.
* 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
*/
@Test(groups = TestNGGroup.HADOOP_AZURE_TO_HDFS)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITAzureInputToHdfsHadoopIndexTest extends AbstractAzureInputHadoopIndexTest
{
public void testGcsIndexData() throws Exception
{
doTest();
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it
* exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the
* shared folder used by the integration test containers if running the Docker-based Hadoop container,
* in which case this property can be set to /shared/<path_of_your_credentials_file>
* 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
* 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
* 5) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
* integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs for env vars to provide.
* 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
*/
@Test(groups = TestNGGroup.HADOOP_GCS_TO_GCS)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITGcsInputToGcsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest
{
public void testGcsIndexData() throws Exception
{
doTest();
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2. Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it
* exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the
* shared folder used by the integration test containers if running the Docker-based Hadoop container,
* in which case this property can be set to /shared/<path_of_your_credentials_file>
* 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
* 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
* 5) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
* integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs for env vars to provide.
* 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
*/
@Test(groups = TestNGGroup.HADOOP_GCS_TO_HDFS)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITGcsInputToHdfsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest
{
public void testGcsIndexData() throws Exception
{
doTest();
}
}

View File

@ -19,87 +19,150 @@
package org.apache.druid.tests.hadoop; package org.apache.druid.tests.hadoop;
import com.google.inject.Inject; import com.google.common.collect.ImmutableList;
import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec;
import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.testing.IntegrationTestingConfig;
import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.testing.utils.ITRetryUtil;
import org.apache.druid.tests.TestNGGroup; import org.apache.druid.tests.TestNGGroup;
import org.apache.druid.tests.indexer.AbstractIndexerTest; import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
import org.testng.annotations.AfterClass; import org.testng.annotations.DataProvider;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Guice; import org.testng.annotations.Guice;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@Test(groups = TestNGGroup.HADOOP_INDEX) import java.io.Closeable;
import java.util.UUID;
import java.util.function.Function;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/
* If using the Docker-based Hadoop container, this is automatically done by the integration tests.
* 2) Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
* at /batch_index/tsv/
* If using the Docker-based Hadoop container, this is automatically done by the integration tests.
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with HDFS configs set. See
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
* 3) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
*/
@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
@Guice(moduleFactory = DruidTestModuleFactory.class) @Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITHadoopIndexTest extends AbstractIndexerTest public class ITHadoopIndexTest extends AbstractITBatchIndexTest
{ {
private static final Logger LOG = new Logger(ITHadoopIndexTest.class); private static final Logger LOG = new Logger(ITHadoopIndexTest.class);
private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json"; private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json";
private static final String BATCH_QUERIES_RESOURCE = "/hadoop/batch_hadoop_queries.json"; private static final String BATCH_QUERIES_RESOURCE = "/hadoop/batch_hadoop_queries.json";
private static final String BATCH_DATASOURCE = "batchHadoop"; private static final String BATCH_DATASOURCE = "batchHadoop";
private boolean dataLoaded = false;
@Inject private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_index_task.json";
private IntegrationTestingConfig config; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_hadoop_index_test";
@BeforeClass private static final String REINDEX_TASK = "/hadoop/wikipedia_hadoop_reindex_task.json";
public void beforeClass() private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries.json";
private static final String REINDEX_DATASOURCE = "wikipedia_hadoop_reindex_test";
@DataProvider
public static Object[][] resources()
{ {
loadData(config.getProperty("hadoopTestDir") + "/batchHadoop1"); return new Object[][]{
dataLoaded = true; {new HashedPartitionsSpec(3, null, null)},
{new HashedPartitionsSpec(null, 3, ImmutableList.of("page"))},
{new HashedPartitionsSpec(null, 3, ImmutableList.of("page", "user"))},
{new SingleDimensionPartitionsSpec(1000, null, null, false)},
{new SingleDimensionPartitionsSpec(1000, null, "page", false)},
{new SingleDimensionPartitionsSpec(1000, null, null, true)},
//{new HashedPartitionsSpec(null, 3, null)} // this results in a bug where the segments have 0 rows
};
} }
@Test @Test
public void testHadoopIndex() throws Exception public void testLegacyITHadoopIndexTest() throws Exception
{ {
queryHelper.testQueriesFromFile(BATCH_QUERIES_RESOURCE, 2); try (
} final Closeable ignored0 = unloader(BATCH_DATASOURCE + config.getExtraDatasourceNameSuffix());
) {
private void loadData(String hadoopDir) final Function<String, String> specPathsTransform = spec -> {
{
String indexerSpec;
try { try {
LOG.info("indexerFile name: [%s]", BATCH_TASK); String path = "/batch_index/tsv";
indexerSpec = getResourceAsString(BATCH_TASK); spec = StringUtils.replace(
indexerSpec = StringUtils.replace(indexerSpec, "%%HADOOP_TEST_PATH%%", hadoopDir); spec,
"%%INPUT_PATHS%%",
path
);
return spec;
} }
catch (Exception e) { catch (Exception e) {
LOG.error("could not read and modify indexer file: %s", e.getMessage());
throw new RuntimeException(e); throw new RuntimeException(e);
} }
};
try { doIndexTest(
final String taskID = indexer.submitTask(indexerSpec); BATCH_DATASOURCE,
LOG.info("TaskID for loading index task %s", taskID); BATCH_TASK,
indexer.waitUntilTaskCompletes(taskID, 10000, 120); specPathsTransform,
ITRetryUtil.retryUntil( BATCH_QUERIES_RESOURCE,
() -> coordinator.areSegmentsLoaded(BATCH_DATASOURCE), false,
true, true,
20000, true
10,
"Segment-Load-Task-" + taskID
); );
} }
catch (Exception e) {
LOG.error("data could not be loaded: %s", e.getMessage());
throw new RuntimeException(e);
}
} }
@AfterClass @Test(dataProvider = "resources")
public void afterClass() public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Exception
{ {
if (dataLoaded) { String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID();
String reindexDatasource = REINDEX_DATASOURCE + "_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> specPathsTransform = spec -> {
try { try {
unloadAndKillData(BATCH_DATASOURCE); String path = "/batch_index/json";
spec = StringUtils.replace(
spec,
"%%INPUT_PATHS%%",
path
);
spec = StringUtils.replace(
spec,
"%%PARTITIONS_SPEC%%",
jsonMapper.writeValueAsString(partitionsSpec)
);
return spec;
} }
catch (Exception e) { catch (Exception e) {
LOG.warn(e, "exception while removing segments"); throw new RuntimeException(e);
} }
};
doIndexTest(
indexDatasource,
INDEX_TASK,
specPathsTransform,
INDEX_QUERIES_RESOURCE,
false,
true,
true
);
doReindexTest(
indexDatasource,
reindexDatasource,
REINDEX_TASK,
REINDEX_QUERIES_RESOURCE
);
} }
} }
} }

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the bucket, path, and region for your data.
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath
* and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file.
* 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys.
* 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 4) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs for env vars to provide.
* 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
*/
@Test(groups = TestNGGroup.HADOOP_S3_TO_HDFS)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITS3InputToHdfsHadoopIndexTest extends AbstractS3InputHadoopIndexTest
{
@Test()
public void testS3IndexData() throws Exception
{
doTest();
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.tests.hadoop;
import org.apache.druid.testing.guice.DruidTestModuleFactory;
import org.apache.druid.tests.TestNGGroup;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
/**
* IMPORTANT:
* To run this test, you must:
* 1) Set the bucket, path, and region for your data.
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath
* and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file.
* 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys.
* 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 4) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 for env vars to provide.
* 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
*/
@Test(groups = TestNGGroup.HADOOP_S3_TO_S3)
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITS3InputToS3HadoopIndexTest extends AbstractS3InputHadoopIndexTest
{
@Test()
public void testS3IndexData() throws Exception
{
doTest();
}
}

View File

@ -85,14 +85,14 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class); private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);
@Inject @Inject
IntegrationTestingConfig config; protected IntegrationTestingConfig config;
@Inject @Inject
protected SqlTestQueryHelper sqlQueryHelper; protected SqlTestQueryHelper sqlQueryHelper;
@Inject @Inject
ClientInfoResourceTestClient clientInfoResourceTestClient; ClientInfoResourceTestClient clientInfoResourceTestClient;
void doIndexTest( protected void doIndexTest(
String dataSource, String dataSource,
String indexTaskFilePath, String indexTaskFilePath,
String queryFilePath, String queryFilePath,
@ -104,7 +104,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
doIndexTest(dataSource, indexTaskFilePath, Function.identity(), queryFilePath, waitForNewVersion, runTestQueries, waitForSegmentsToLoad); doIndexTest(dataSource, indexTaskFilePath, Function.identity(), queryFilePath, waitForNewVersion, runTestQueries, waitForSegmentsToLoad);
} }
void doIndexTest( protected void doIndexTest(
String dataSource, String dataSource,
String indexTaskFilePath, String indexTaskFilePath,
Function<String, String> taskSpecTransform, Function<String, String> taskSpecTransform,
@ -151,7 +151,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
} }
} }
void doReindexTest( protected void doReindexTest(
String baseDataSource, String baseDataSource,
String reindexDataSource, String reindexDataSource,
String reindexTaskFilePath, String reindexTaskFilePath,

View File

@ -2,9 +2,9 @@
"type": "index_hadoop", "type": "index_hadoop",
"spec": { "spec": {
"dataSchema": { "dataSchema": {
"dataSource": "batchHadoop", "dataSource": "%%DATASOURCE%%",
"parser": { "parser": {
"type": "string", "type": "hadoopyString",
"parseSpec": { "parseSpec": {
"format": "tsv", "format": "tsv",
"timestampSpec": { "timestampSpec": {
@ -53,7 +53,7 @@
"type": "hadoop", "type": "hadoop",
"inputSpec": { "inputSpec": {
"type": "static", "type": "static",
"paths": "%%HADOOP_TEST_PATH%%" "paths": "/batch_index/tsv/batch_hadoop.data"
} }
}, },
"tuningConfig": { "tuningConfig": {
@ -64,7 +64,19 @@
"type": "hashed" "type": "hashed"
}, },
"jobProperties": { "jobProperties": {
"fs.permissions.umask-mode": "022" "fs.permissions.umask-mode": "022",
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
"dfs.datanode.address" : "druid-it-hadoop",
"dfs.client.use.datanode.hostname" : "true",
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.map.memory.mb" : 1024,
"mapreduce.reduce.memory.mb" : 1024
}, },
"rowFlushBoundary": 10000 "rowFlushBoundary": 10000
} }

View File

@ -0,0 +1,107 @@
{
"type": "index_hadoop",
"hadoopDependencyCoordinates" : ["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"],
"spec": {
"dataSchema": {
"dataSource": "%%DATASOURCE%%",
"parser": {
"type": "hadoopyString",
"parseSpec": {
"format" : "json",
"timestampSpec": {
"column": "timestamp"
},
"dimensionsSpec": {
"dimensions": [
"page",
{"type": "string", "name": "language", "createBitmapIndex": false},
"user",
"unpatrolled",
"newPage",
"robot",
"anonymous",
"namespace",
"continent",
"country",
"region",
"city"
]
}
}
},
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
},
{
"name": "thetaSketch",
"type": "thetaSketch",
"fieldName": "user"
},
{
"name": "quantilesDoublesSketch",
"type": "quantilesDoublesSketch",
"fieldName": "delta"
},
{
"name": "HLLSketchBuild",
"type": "HLLSketchBuild",
"fieldName": "user"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second",
"intervals" : [ "2013-08-31/2013-09-02" ]
}
},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "static",
"paths": "%%INPUT_PATHS%%"
}
},
"tuningConfig": {
"type": "hadoop",
"partitionsSpec": {
"assumeGrouped": true,
"targetPartitionSize": 75000,
"type": "hashed"
},
"jobProperties": {
"fs.permissions.umask-mode": "022",
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
"dfs.datanode.address" : "druid-it-hadoop",
"dfs.client.use.datanode.hostname" : "true",
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.map.memory.mb" : 1024,
"mapreduce.reduce.memory.mb" : 1024,
"fs.azure.account.key.%%AZURE_ACCOUNT%%.blob.core.windows.net":"%%AZURE_KEY%%"
},
"rowFlushBoundary": 10000
}
}
}

View File

@ -0,0 +1,113 @@
{
"type": "index_hadoop",
"spec": {
"dataSchema": {
"dataSource": "%%DATASOURCE%%",
"parser": {
"type": "hadoopyString",
"parseSpec": {
"format" : "json",
"timestampSpec": {
"column": "timestamp"
},
"dimensionsSpec": {
"dimensions": [
"page",
{"type": "string", "name": "language", "createBitmapIndex": false},
"user",
"unpatrolled",
"newPage",
"robot",
"anonymous",
"namespace",
"continent",
"country",
"region",
"city"
]
}
}
},
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
},
{
"name": "thetaSketch",
"type": "thetaSketch",
"fieldName": "user"
},
{
"name": "quantilesDoublesSketch",
"type": "quantilesDoublesSketch",
"fieldName": "delta"
},
{
"name": "HLLSketchBuild",
"type": "HLLSketchBuild",
"fieldName": "user"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second",
"intervals" : [ "2013-08-31/2013-09-02" ]
}
},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "static",
"paths": "%%INPUT_PATHS%%"
}
},
"tuningConfig": {
"type": "hadoop",
"partitionsSpec": {
"assumeGrouped": true,
"targetPartitionSize": 75000,
"type": "hashed"
},
"jobProperties": {
"fs.permissions.umask-mode": "022",
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
"dfs.datanode.address" : "druid-it-hadoop",
"dfs.client.use.datanode.hostname" : "true",
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.map.memory.mb" : 1024,
"mapreduce.reduce.memory.mb" : 1024,
"fs.gs.auth.service.account.json.keyfile":"%%GCS_KEYFILE_PATH%%",
"fs.gs.working.dir":"/",
"fs.gs.path.encoding":"uri-path",
"fs.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem",
"fs.AbstractFileSystem.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS",
"fs.gs.auth.service.account.enable": "true",
"mapred.child.env":"GOOGLE_APPLICATION_CREDENTIALS=%%GCS_KEYFILE_PATH%%",
"fs.gs.reported.permissions":"777"
},
"rowFlushBoundary": 10000
}
}
}

View File

@ -0,0 +1,101 @@
{
"type": "index_hadoop",
"spec": {
"dataSchema": {
"dataSource": "%%DATASOURCE%%",
"parser": {
"type": "hadoopyString",
"parseSpec": {
"format" : "json",
"timestampSpec": {
"column": "timestamp"
},
"dimensionsSpec": {
"dimensions": [
"page",
{"type": "string", "name": "language", "createBitmapIndex": false},
"user",
"unpatrolled",
"newPage",
"robot",
"anonymous",
"namespace",
"continent",
"country",
"region",
"city"
]
}
}
},
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
},
{
"name": "thetaSketch",
"type": "thetaSketch",
"fieldName": "user"
},
{
"name": "quantilesDoublesSketch",
"type": "quantilesDoublesSketch",
"fieldName": "delta"
},
{
"name": "HLLSketchBuild",
"type": "HLLSketchBuild",
"fieldName": "user"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second",
"intervals" : [ "2013-08-31/2013-09-02" ]
}
},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "static",
"paths": "%%INPUT_PATHS%%"
}
},
"tuningConfig": {
"type": "hadoop",
"partitionsSpec": %%PARTITIONS_SPEC%%,
"jobProperties": {
"fs.permissions.umask-mode": "022",
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
"dfs.datanode.address" : "druid-it-hadoop",
"dfs.client.use.datanode.hostname" : "true",
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.map.memory.mb" : 1024,
"mapreduce.reduce.memory.mb" : 1024
},
"rowFlushBoundary": 10000
}
}
}

View File

@ -0,0 +1,77 @@
{
"type": "index_hadoop",
"spec": {
"dataSchema": {
"dataSource": "%%REINDEX_DATASOURCE%%",
"parser": {
"type": "hadoopyString",
"parseSpec": {
"format" : "json",
"timestampSpec": {
"column": "timestamp",
"format": "iso"
},
"dimensionsSpec": {
"dimensionExclusions" : ["robot", "continent"]
}
}
},
"metricsSpec": [
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second",
"intervals" : [ "2013-08-31/2013-09-01" ]
}
},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "dataSource",
"ingestionSpec": {
"dataSource": "%%DATASOURCE%%",
"intervals": ["2013-08-31/2013-09-01"]
}
}
},
"tuningConfig": {
"type": "hadoop",
"partitionsSpec": {
"assumeGrouped": true,
"targetPartitionSize": 75000,
"type": "hashed"
},
"jobProperties": {
"fs.permissions.umask-mode": "022",
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
"dfs.datanode.address" : "druid-it-hadoop",
"dfs.client.use.datanode.hostname" : "true",
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.map.memory.mb" : 1024,
"mapreduce.reduce.memory.mb" : 1024
},
"rowFlushBoundary": 10000
}
}
}

View File

@ -0,0 +1,114 @@
{
"type": "index_hadoop",
"spec": {
"dataSchema": {
"dataSource": "%%DATASOURCE%%",
"parser": {
"type": "hadoopyString",
"parseSpec": {
"format" : "json",
"timestampSpec": {
"column": "timestamp"
},
"dimensionsSpec": {
"dimensions": [
"page",
{"type": "string", "name": "language", "createBitmapIndex": false},
"user",
"unpatrolled",
"newPage",
"robot",
"anonymous",
"namespace",
"continent",
"country",
"region",
"city"
]
}
}
},
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
},
{
"name": "thetaSketch",
"type": "thetaSketch",
"fieldName": "user"
},
{
"name": "quantilesDoublesSketch",
"type": "quantilesDoublesSketch",
"fieldName": "delta"
},
{
"name": "HLLSketchBuild",
"type": "HLLSketchBuild",
"fieldName": "user"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second",
"intervals" : [ "2013-08-31/2013-09-02" ]
}
},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "static",
"paths": "%%INPUT_PATHS%%"
}
},
"tuningConfig": {
"type": "hadoop",
"partitionsSpec": {
"assumeGrouped": true,
"targetPartitionSize": 75000,
"type": "hashed"
},
"jobProperties": {
"fs.permissions.umask-mode": "022",
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
"dfs.datanode.address" : "druid-it-hadoop",
"dfs.client.use.datanode.hostname" : "true",
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
"mapreduce.map.memory.mb" : 1024,
"mapreduce.reduce.memory.mb" : 1024,
"fs.s3.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
"fs.s3.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
"fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
"fs.s3n.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
"fs.s3n.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
"fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
"fs.s3a.access.key" : "%%AWS_ACCESS_KEY%%",
"fs.s3a.secret.key" : "%%AWS_SECRET_KEY%%",
"fs.s3a.impl" : "org.apache.hadoop.fs.s3a.S3AFileSystem"
},
"rowFlushBoundary": 10000
}
}
}

View File

@ -28,7 +28,6 @@
<test name="AllSerializedTests"> <test name="AllSerializedTests">
<packages> <packages>
<package name="org.apache.druid.tests.*"> <package name="org.apache.druid.tests.*">
<exclude name="org.apache.druid.tests.hadoop"/>
<exclude name="org.apache.druid.tests.parallelized"/> <exclude name="org.apache.druid.tests.parallelized"/>
</package> </package>
</packages> </packages>