mirror of https://github.com/apache/druid.git
More Hadoop integration tests (#9714)
* More Hadoop integration tests * Add missing s3 instructions * Address PR comments * Address PR comments * PR comments * Fix typo
This commit is contained in:
parent
c61365c1e0
commit
61295bd002
|
@ -373,7 +373,7 @@ jobs:
|
|||
name: "(Compile=openjdk8, Run=openjdk8) other integration test"
|
||||
jdk: openjdk8
|
||||
services: *integration_test_services
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=8'
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
|
||||
script: *run_integration_test
|
||||
after_failure: *integration_test_diags
|
||||
# END - Integration tests for Compile with Java 8 and Run with Java 8
|
||||
|
@ -407,7 +407,7 @@ jobs:
|
|||
- <<: *integration_tests
|
||||
name: "(Compile=openjdk8, Run=openjdk11) other integration test"
|
||||
jdk: openjdk8
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
# END - Integration tests for Compile with Java 8 and Run with Java 11
|
||||
|
||||
- name: "security vulnerabilities"
|
||||
|
|
|
@ -214,31 +214,33 @@ of the integration test run discussed above. This is because druid
|
|||
test clusters might not, in general, have access to hadoop.
|
||||
This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage.
|
||||
To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways:
|
||||
1) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE).
|
||||
2) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command.
|
||||
1) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command.
|
||||
2) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE).
|
||||
|
||||
Currently, hdfs-deep-storage and other <cloud>-deep-storage integration test groups can only be run with
|
||||
Druid Docker test clusters by passing -Dstart.hadoop.docker=true to start Hadoop container.
|
||||
You will also have to provide -Doverride.config.path=<PATH_TO_FILE> with your Druid's Hadoop configs set.
|
||||
See integration-tests/docker/environment-configs/override-examples/hdfs directory for example.
|
||||
Note that if the integration test you are running also uses other cloud extension (S3, Azure, GCS), additional
|
||||
credentials/configs may need to be set in the same file as your Druid's Hadoop configs set.
|
||||
credentials/configs may need to be set in the same file as your Druid's Hadoop configs set.
|
||||
|
||||
Currently, ITHadoopIndexTest can only be run with your own Druid + Hadoop cluster by following the below steps:
|
||||
Create a directory called batchHadoop1 in the hadoop file system
|
||||
(anywhere you want) and put batch_hadoop.data (integration-tests/src/test/resources/hadoop/batch_hadoop.data)
|
||||
into that directory (as its only file).
|
||||
If you are running ITHadoopIndexTest with your own Druid + Hadoop cluster, please follow the below steps:
|
||||
- Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/
|
||||
- Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
|
||||
at /batch_index/tsv/
|
||||
If using the Docker-based Hadoop container, the steps above are automatically done by the integration tests.
|
||||
|
||||
Add this keyword to the configuration file (see above):
|
||||
When running the Hadoop tests, you must set `-Dextra.datasource.name.suffix=''`, due to https://github.com/apache/druid/issues/9788.
|
||||
|
||||
Run the test using mvn (using the bundled Docker-based Hadoop cluster):
|
||||
```
|
||||
"hadoopTestDir": "<name_of_dir_containing_batchHadoop1>"
|
||||
mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Dstart.hadoop.docker=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix=''
|
||||
```
|
||||
|
||||
Run the test using mvn:
|
||||
|
||||
Run the test using mvn (using config file for existing Hadoop cluster):
|
||||
```
|
||||
mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest
|
||||
mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest -Dextra.datasource.name.suffix=''
|
||||
```
|
||||
|
||||
In some test environments, the machine where the tests need to be executed
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||
#
|
||||
druid_storage_type=azure
|
||||
druid_azure_account=<OVERRIDE_THIS>
|
||||
druid_azure_key=<OVERRIDE_THIS>
|
||||
druid_azure_container=<OVERRIDE_THIS>
|
||||
|
||||
druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"]
|
||||
|
||||
druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]
|
|
@ -0,0 +1,34 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||
#
|
||||
druid_storage_type=hdfs
|
||||
druid_storage_storageDirectory=/druid/segments
|
||||
|
||||
druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"]
|
||||
|
||||
# Not used since we have HDFS deep storage, but the Druid Azure extension requires these to be defined
|
||||
druid_azure_account=<OVERRIDE_THIS>
|
||||
druid_azure_key=<OVERRIDE_THIS>
|
||||
druid_azure_container=<OVERRIDE_THIS>
|
||||
|
||||
druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]
|
|
@ -0,0 +1,30 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
|
||||
#
|
||||
druid_storage_type=google
|
||||
druid_google_bucket=<OVERRIDE_THIS>
|
||||
druid_google_prefix=<OVERRIDE_THIS>
|
||||
|
||||
druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"]
|
||||
|
||||
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>
|
|
@ -0,0 +1,30 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
|
||||
#
|
||||
druid_storage_type=hdfs
|
||||
druid_storage_storageDirectory=/druid/segments
|
||||
|
||||
druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"]
|
||||
|
||||
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||
#
|
||||
druid_s3_accessKey=<OVERRIDE_THIS>
|
||||
druid_s3_secretKey=<OVERRIDE_THIS>
|
||||
|
||||
druid_storage_type=hdfs
|
||||
druid_storage_storageDirectory=/druid/segments
|
||||
|
||||
AWS_REGION=<OVERRIDE_THIS>
|
||||
|
||||
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
|
||||
|
||||
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
|
|
@ -0,0 +1,35 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# Example of override config file to provide.
|
||||
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||
#
|
||||
druid_s3_accessKey=<OVERRIDE_THIS>
|
||||
druid_s3_secretKey=<OVERRIDE_THIS>
|
||||
|
||||
druid_storage_type=s3
|
||||
druid_storage_bucket=<OVERRIDE_THIS>
|
||||
druid_storage_baseKey=<OVERRIDE_THIS>
|
||||
|
||||
AWS_REGION=<OVERRIDE_THIS>
|
||||
|
||||
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
|
||||
|
||||
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
|
|
@ -27,3 +27,5 @@ SERVICE_DRUID_JAVA_OPTS=-server -Xmx128m -XX:+UseG1GC -agentlib:jdwp=transport=d
|
|||
druid_auth_basic_common_cacheDirectory=/tmp/authCache/router
|
||||
druid_sql_avatica_enable=true
|
||||
druid_server_https_crlPath=/tls/revocations.crl
|
||||
druid_router_managementProxy_enabled=true
|
||||
|
||||
|
|
|
@ -32,5 +32,3 @@ cd client_tls
|
|||
../docker/tls/generate-to-be-revoked-client-cert.sh
|
||||
../docker/tls/generate-untrusted-root-client-cert.sh
|
||||
../docker/tls/generate-valid-intermediate-client-cert.sh
|
||||
|
||||
|
||||
|
|
|
@ -118,7 +118,7 @@ rm -rf certs.seq
|
|||
echo 11111115 > certs.seq
|
||||
|
||||
# Generate a client certificate for this machine
|
||||
openssl genrsa -out expired_client.key 1024 -sha256
|
||||
openssl genrsa -out expired_client.key 1024
|
||||
openssl req -new -out expired_client.csr -key expired_client.key -reqexts req_ext -config expired_csr.conf
|
||||
openssl ca -batch -config root_for_expired_client.cnf -policy policy_loose -out expired_client.pem -outdir . -startdate 101010000000Z -enddate 101011000000Z -extensions v3_ca -cert root.pem -keyfile root.key -infiles expired_client.csr
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ DNS.2 = localhost
|
|||
EOT
|
||||
|
||||
# Generate a client certificate for this machine
|
||||
openssl genrsa -out client.key 1024 -sha256
|
||||
openssl genrsa -out client.key 1024
|
||||
openssl req -new -out client.csr -key client.key -reqexts req_ext -config csr.conf
|
||||
openssl x509 -req -days 3650 -in client.csr -CA root.pem -CAkey root.key -set_serial 0x11111111 -out client.pem -sha256 -extfile csr.conf -extensions req_ext
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ DNS.1 = thisisprobablywrongtoo
|
|||
|
||||
EOT
|
||||
|
||||
openssl genrsa -out invalid_hostname_client.key 1024 -sha256
|
||||
openssl genrsa -out invalid_hostname_client.key 1024
|
||||
openssl req -new -out invalid_hostname_client.csr -key invalid_hostname_client.key -reqexts req_ext -config invalid_hostname_csr.conf
|
||||
openssl x509 -req -days 3650 -in invalid_hostname_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111112 -out invalid_hostname_client.pem -sha256 -extfile invalid_hostname_csr.conf -extensions req_ext
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ IP.1 = 9.9.9.9
|
|||
EOT
|
||||
|
||||
# Generate a bad intermediate certificate
|
||||
openssl genrsa -out invalid_ca_intermediate.key 1024 -sha256
|
||||
openssl genrsa -out invalid_ca_intermediate.key 1024
|
||||
openssl req -new -out invalid_ca_intermediate.csr -key invalid_ca_intermediate.key -reqexts req_ext -config invalid_ca_intermediate.conf
|
||||
openssl x509 -req -days 3650 -in invalid_ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333331 -out invalid_ca_intermediate.pem -sha256 -extfile invalid_ca_intermediate.conf -extensions req_ext
|
||||
|
||||
|
@ -81,7 +81,7 @@ DNS.2 = localhost
|
|||
EOT
|
||||
|
||||
# Generate a client certificate for this machine
|
||||
openssl genrsa -out invalid_ca_client.key 1024 -sha256
|
||||
openssl genrsa -out invalid_ca_client.key 1024
|
||||
openssl req -new -out invalid_ca_client.csr -key invalid_ca_client.key -reqexts req_ext -config invalid_ca_client.conf
|
||||
openssl x509 -req -days 3650 -in invalid_ca_client.csr -CA invalid_ca_intermediate.pem -CAkey invalid_ca_intermediate.key -set_serial 0x33333333 -out invalid_ca_client.pem -sha256 -extfile invalid_ca_client.conf -extensions req_ext
|
||||
|
||||
|
|
|
@ -52,9 +52,9 @@ DNS.2 = localhost
|
|||
EOT
|
||||
|
||||
# Generate a client certificate for this machine
|
||||
openssl genrsa -out revoked_client.key 1024 -sha256
|
||||
openssl genrsa -out revoked_client.key 1024
|
||||
openssl req -new -out revoked_client.csr -key revoked_client.key -reqexts req_ext -config revoked_csr.conf
|
||||
openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile csr.conf -extensions req_ext
|
||||
openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile revoked_csr.conf -extensions req_ext
|
||||
|
||||
# Create a Java keystore containing the generated certificate
|
||||
openssl pkcs12 -export -in revoked_client.pem -inkey revoked_client.key -out revoked_client.p12 -name revoked_druid -CAfile root.pem -caname druid-it-root -password pass:druid123
|
||||
|
|
|
@ -50,7 +50,7 @@ DNS.2 = localhost
|
|||
EOT
|
||||
|
||||
# Generate a client certificate for this machine
|
||||
openssl genrsa -out client_another_root.key 1024 -sha256
|
||||
openssl genrsa -out client_another_root.key 1024
|
||||
openssl req -new -out client_another_root.csr -key client_another_root.key -reqexts req_ext -config csr_another_root.conf
|
||||
openssl x509 -req -days 3650 -in client_another_root.csr -CA untrusted_root.pem -CAkey untrusted_root.key -set_serial 0x11111114 -out client_another_root.pem -sha256 -extfile csr_another_root.conf -extensions req_ext
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ IP.1 = 9.9.9.9
|
|||
EOT
|
||||
|
||||
# Generate an intermediate certificate
|
||||
openssl genrsa -out ca_intermediate.key 1024 -sha256
|
||||
openssl genrsa -out ca_intermediate.key 1024
|
||||
openssl req -new -out ca_intermediate.csr -key ca_intermediate.key -reqexts req_ext -config ca_intermediate.conf
|
||||
openssl x509 -req -days 3650 -in ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333332 -out ca_intermediate.pem -sha256 -extfile ca_intermediate.conf -extensions req_ext
|
||||
|
||||
|
@ -81,7 +81,7 @@ DNS.2 = localhost
|
|||
EOT
|
||||
|
||||
# Generate a client certificate for this machine
|
||||
openssl genrsa -out intermediate_ca_client.key 1024 -sha256
|
||||
openssl genrsa -out intermediate_ca_client.key 1024
|
||||
openssl req -new -out intermediate_ca_client.csr -key intermediate_ca_client.key -reqexts req_ext -config intermediate_ca_client.conf
|
||||
openssl x509 -req -days 3650 -in intermediate_ca_client.csr -CA ca_intermediate.pem -CAkey ca_intermediate.key -set_serial 0x33333333 -out intermediate_ca_client.pem -sha256 -extfile intermediate_ca_client.conf -extensions req_ext
|
||||
|
||||
|
|
|
@ -365,6 +365,7 @@
|
|||
<skip.stop.docker>false</skip.stop.docker>
|
||||
<override.config.path></override.config.path>
|
||||
<resource.file.dir.path></resource.file.dir.path>
|
||||
<extra.datasource.name.suffix>\ Россия\ 한국\ 中国!?</extra.datasource.name.suffix>
|
||||
</properties>
|
||||
<build>
|
||||
<plugins>
|
||||
|
@ -430,7 +431,7 @@
|
|||
-Dfile.encoding=UTF-8
|
||||
-Ddruid.test.config.dockerIp=${env.DOCKER_IP}
|
||||
-Ddruid.test.config.hadoopDir=${env.HADOOP_DIR}
|
||||
-Ddruid.test.config.extraDatasourceNameSuffix=\ Россия\ 한국\ 中国!?
|
||||
-Ddruid.test.config.extraDatasourceNameSuffix=${extra.datasource.name.suffix}
|
||||
-Ddruid.zk.service.host=${env.DOCKER_IP}
|
||||
-Ddruid.client.https.trustStorePath=client_tls/truststore.jks
|
||||
-Ddruid.client.https.trustStorePassword=druid123
|
||||
|
|
|
@ -80,17 +80,18 @@ if [ -n "$DRUID_INTEGRATION_TEST_SKIP_START_DOCKER" ] && [ "$DRUID_INTEGRATION_T
|
|||
# For druid-kinesis-indexing-service
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
||||
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
||||
$ For druid-parquet-extensions
|
||||
# For druid-parquet-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
||||
$ For druid-orc-extensions
|
||||
# For druid-orc-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions
|
||||
|
||||
# Pull Hadoop dependency if needed
|
||||
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
||||
then
|
||||
java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5
|
||||
java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 -h org.apache.hadoop:hadoop-azure:2.8.5
|
||||
curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar
|
||||
fi
|
||||
|
||||
# install logging config
|
||||
|
@ -243,4 +244,4 @@ fi
|
|||
|
||||
# Start Router with custom TLS cert checkers
|
||||
docker run -d --privileged --net druid-it-net --ip 172.172.172.12 ${COMMON_ENV} ${ROUTER_CUSTOM_CHECK_TLS_ENV} ${OVERRIDE_ENV} --hostname druid-router-custom-check-tls --name druid-router-custom-check-tls -p 5003:5003 -p 8891:8891 -p 9091:9091 -v $SHARED_DIR:/shared -v $SERVICE_SUPERVISORDS_DIR/druid.conf:$SUPERVISORDIR/druid.conf --link druid-zookeeper-kafka:druid-zookeeper-kafka --link druid-coordinator:druid-coordinator --link druid-broker:druid-broker druid/cluster
|
||||
}
|
||||
}
|
|
@ -57,6 +57,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
|||
private String password;
|
||||
private String cloudBucket;
|
||||
private String cloudPath;
|
||||
private String cloudRegion;
|
||||
private String hadoopGcsCredentialsPath;
|
||||
private String azureKey;
|
||||
private String streamEndpoint;
|
||||
|
||||
@JsonCreator
|
||||
|
@ -193,6 +196,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
|||
|
||||
cloudBucket = props.get("cloud_bucket");
|
||||
cloudPath = props.get("cloud_path");
|
||||
cloudRegion = props.get("cloud_region");
|
||||
hadoopGcsCredentialsPath = props.get("hadoopGcsCredentialsPath");
|
||||
azureKey = props.get("azureKey");
|
||||
streamEndpoint = props.get("stream_endpoint");
|
||||
|
||||
LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl);
|
||||
|
@ -356,6 +362,24 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
|||
return cloudPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCloudRegion()
|
||||
{
|
||||
return cloudRegion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAzureKey()
|
||||
{
|
||||
return azureKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHadoopGcsCredentialsPath()
|
||||
{
|
||||
return hadoopGcsCredentialsPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getStreamEndpoint()
|
||||
{
|
||||
|
|
|
@ -33,10 +33,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
|||
@NotNull
|
||||
private String dockerIp;
|
||||
|
||||
@JsonProperty
|
||||
@NotNull
|
||||
private String hadoopDir;
|
||||
|
||||
@JsonProperty
|
||||
private String extraDatasourceNameSuffix = "";
|
||||
|
||||
|
@ -46,6 +42,15 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
|||
@JsonProperty
|
||||
private String cloudBucket;
|
||||
|
||||
@JsonProperty
|
||||
private String cloudRegion;
|
||||
|
||||
@JsonProperty
|
||||
private String hadoopGcsCredentialsPath;
|
||||
|
||||
@JsonProperty
|
||||
private String azureKey;
|
||||
|
||||
@JsonProperty
|
||||
private String streamEndpoint;
|
||||
|
||||
|
@ -185,9 +190,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
|||
@Override
|
||||
public String getProperty(String prop)
|
||||
{
|
||||
if ("hadoopTestDir".equals(prop)) {
|
||||
return hadoopDir;
|
||||
}
|
||||
throw new UnsupportedOperationException("DockerConfigProvider does not support property " + prop);
|
||||
}
|
||||
|
||||
|
@ -233,6 +235,24 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
|||
return cloudPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCloudRegion()
|
||||
{
|
||||
return cloudRegion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAzureKey()
|
||||
{
|
||||
return azureKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHadoopGcsCredentialsPath()
|
||||
{
|
||||
return hadoopGcsCredentialsPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getStreamEndpoint()
|
||||
{
|
||||
|
|
|
@ -89,5 +89,11 @@ public interface IntegrationTestingConfig
|
|||
|
||||
String getCloudPath();
|
||||
|
||||
String getCloudRegion();
|
||||
|
||||
String getAzureKey();
|
||||
|
||||
String getHadoopGcsCredentialsPath();
|
||||
|
||||
String getStreamEndpoint();
|
||||
}
|
||||
|
|
|
@ -27,8 +27,6 @@ public class TestNGGroup
|
|||
{
|
||||
public static final String BATCH_INDEX = "batch-index";
|
||||
|
||||
public static final String HADOOP_INDEX = "hadoop-index";
|
||||
|
||||
public static final String KAFKA_INDEX = "kafka-index";
|
||||
|
||||
public static final String KAFKA_INDEX_SLOW = "kafka-index-slow";
|
||||
|
@ -86,6 +84,15 @@ public class TestNGGroup
|
|||
*/
|
||||
public static final String HDFS_DEEP_STORAGE = "hdfs-deep-storage";
|
||||
|
||||
public static final String HADOOP_S3_TO_S3 = "hadoop-s3-to-s3-deep-storage";
|
||||
public static final String HADOOP_S3_TO_HDFS = "hadoop-s3-to-hdfs-deep-storage";
|
||||
|
||||
public static final String HADOOP_AZURE_TO_AZURE = "hadoop-azure-to-azure-deep-storage";
|
||||
public static final String HADOOP_AZURE_TO_HDFS = "hadoop-azure-to-hdfs-deep-storage";
|
||||
|
||||
public static final String HADOOP_GCS_TO_GCS = "hadoop-gcs-to-gcs-deep-storage";
|
||||
public static final String HADOOP_GCS_TO_HDFS = "hadoop-gcs-to-hdfs-deep-storage";
|
||||
|
||||
/**
|
||||
* This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file.
|
||||
* The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
public abstract class AbstractAzureInputHadoopIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_azure_input_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
|
||||
void doTest() throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> azurePropsTransform = spec -> {
|
||||
try {
|
||||
String path = StringUtils.format(
|
||||
"wasbs://%s@%s.blob.core.windows.net/",
|
||||
config.getCloudPath(),
|
||||
config.getCloudBucket()
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_PATHS%%",
|
||||
path
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%AZURE_ACCOUNT%%",
|
||||
config.getCloudBucket()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%AZURE_KEY%%",
|
||||
config.getAzureKey()
|
||||
);
|
||||
|
||||
return spec;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
azurePropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
public abstract class AbstractGcsInputHadoopIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_gcs_input_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
|
||||
void doTest() throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> gcsPropsTransform = spec -> {
|
||||
try {
|
||||
|
||||
String path = StringUtils.format(
|
||||
"gs://%s/%s",
|
||||
config.getCloudBucket(),
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_PATHS%%",
|
||||
path
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%GCS_KEYFILE_PATH%%",
|
||||
config.getHadoopGcsCredentialsPath()
|
||||
);
|
||||
|
||||
return spec;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
gcsPropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.common.aws.AWSCredentialsConfig;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
public abstract class AbstractS3InputHadoopIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
@Inject
|
||||
protected AWSCredentialsConfig awsCredentialsConfig;
|
||||
|
||||
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_s3_input_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
|
||||
void doTest() throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> s3PropsTransform = spec -> {
|
||||
try {
|
||||
|
||||
String path = StringUtils.format(
|
||||
"s3a://%s/%s",
|
||||
config.getCloudBucket(),
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_PATHS%%",
|
||||
path
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%AWS_ACCESS_KEY%%",
|
||||
awsCredentialsConfig.getAccessKey().getPassword()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%AWS_SECRET_KEY%%",
|
||||
awsCredentialsConfig.getSecretKey().getPassword()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%AWS_REGION%%",
|
||||
config.getCloudRegion()
|
||||
);
|
||||
|
||||
return spec;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
s3PropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the account, container, and key for your data.
|
||||
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath,
|
||||
* and -Ddruid.test.config.azureKey.
|
||||
* - `cloudBucket` should be set to your Azure account name
|
||||
* - `cloudPath` should be set to the Azure container where the input data resides
|
||||
* - `azureKey` should be set to an Azure access key that can read from the container above
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure+Hadoop configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure for env vars to provide.
|
||||
* 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HADOOP_AZURE_TO_AZURE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITAzureInputToAzureHadoopIndexTest extends AbstractAzureInputHadoopIndexTest
|
||||
{
|
||||
public void testGcsIndexData() throws Exception
|
||||
{
|
||||
doTest();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the account, container, and key for your data.
|
||||
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath,
|
||||
* and -Ddruid.test.config.azureKey.
|
||||
* - `cloudBucket` should be set to your Azure account name
|
||||
* - `cloudPath` should be set to the Azure container where the input data resides
|
||||
* - `azureKey` should be set to an Azure access key that can read from the container above
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure+Hadoop configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs for env vars to provide.
|
||||
* 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HADOOP_AZURE_TO_HDFS)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITAzureInputToHdfsHadoopIndexTest extends AbstractAzureInputHadoopIndexTest
|
||||
{
|
||||
public void testGcsIndexData() throws Exception
|
||||
{
|
||||
doTest();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it
|
||||
* exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the
|
||||
* shared folder used by the integration test containers if running the Docker-based Hadoop container,
|
||||
* in which case this property can be set to /shared/<path_of_your_credentials_file>
|
||||
* 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
* 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
|
||||
* 5) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs for env vars to provide.
|
||||
* 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HADOOP_GCS_TO_GCS)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITGcsInputToGcsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest
|
||||
{
|
||||
public void testGcsIndexData() throws Exception
|
||||
{
|
||||
doTest();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2. Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it
|
||||
* exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the
|
||||
* shared folder used by the integration test containers if running the Docker-based Hadoop container,
|
||||
* in which case this property can be set to /shared/<path_of_your_credentials_file>
|
||||
* 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
* 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
|
||||
* 5) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs for env vars to provide.
|
||||
* 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HADOOP_GCS_TO_HDFS)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITGcsInputToHdfsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest
|
||||
{
|
||||
public void testGcsIndexData() throws Exception
|
||||
{
|
||||
doTest();
|
||||
}
|
||||
}
|
|
@ -19,87 +19,150 @@
|
|||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec;
|
||||
import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
|
||||
import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.testing.IntegrationTestingConfig;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.testing.utils.ITRetryUtil;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.apache.druid.tests.indexer.AbstractIndexerTest;
|
||||
import org.testng.annotations.AfterClass;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
@Test(groups = TestNGGroup.HADOOP_INDEX)
|
||||
import java.io.Closeable;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/
|
||||
* If using the Docker-based Hadoop container, this is automatically done by the integration tests.
|
||||
* 2) Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
|
||||
* at /batch_index/tsv/
|
||||
* If using the Docker-based Hadoop container, this is automatically done by the integration tests.
|
||||
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with HDFS configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
* 3) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITHadoopIndexTest extends AbstractIndexerTest
|
||||
public class ITHadoopIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final Logger LOG = new Logger(ITHadoopIndexTest.class);
|
||||
|
||||
private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json";
|
||||
private static final String BATCH_QUERIES_RESOURCE = "/hadoop/batch_hadoop_queries.json";
|
||||
private static final String BATCH_DATASOURCE = "batchHadoop";
|
||||
private boolean dataLoaded = false;
|
||||
|
||||
@Inject
|
||||
private IntegrationTestingConfig config;
|
||||
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_hadoop_index_test";
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass()
|
||||
private static final String REINDEX_TASK = "/hadoop/wikipedia_hadoop_reindex_task.json";
|
||||
private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries.json";
|
||||
private static final String REINDEX_DATASOURCE = "wikipedia_hadoop_reindex_test";
|
||||
|
||||
@DataProvider
|
||||
public static Object[][] resources()
|
||||
{
|
||||
loadData(config.getProperty("hadoopTestDir") + "/batchHadoop1");
|
||||
dataLoaded = true;
|
||||
return new Object[][]{
|
||||
{new HashedPartitionsSpec(3, null, null)},
|
||||
{new HashedPartitionsSpec(null, 3, ImmutableList.of("page"))},
|
||||
{new HashedPartitionsSpec(null, 3, ImmutableList.of("page", "user"))},
|
||||
{new SingleDimensionPartitionsSpec(1000, null, null, false)},
|
||||
{new SingleDimensionPartitionsSpec(1000, null, "page", false)},
|
||||
{new SingleDimensionPartitionsSpec(1000, null, null, true)},
|
||||
|
||||
//{new HashedPartitionsSpec(null, 3, null)} // this results in a bug where the segments have 0 rows
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHadoopIndex() throws Exception
|
||||
public void testLegacyITHadoopIndexTest() throws Exception
|
||||
{
|
||||
queryHelper.testQueriesFromFile(BATCH_QUERIES_RESOURCE, 2);
|
||||
}
|
||||
try (
|
||||
final Closeable ignored0 = unloader(BATCH_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> specPathsTransform = spec -> {
|
||||
try {
|
||||
String path = "/batch_index/tsv";
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_PATHS%%",
|
||||
path
|
||||
);
|
||||
|
||||
private void loadData(String hadoopDir)
|
||||
{
|
||||
String indexerSpec;
|
||||
return spec;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
LOG.info("indexerFile name: [%s]", BATCH_TASK);
|
||||
indexerSpec = getResourceAsString(BATCH_TASK);
|
||||
indexerSpec = StringUtils.replace(indexerSpec, "%%HADOOP_TEST_PATH%%", hadoopDir);
|
||||
}
|
||||
catch (Exception e) {
|
||||
LOG.error("could not read and modify indexer file: %s", e.getMessage());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try {
|
||||
final String taskID = indexer.submitTask(indexerSpec);
|
||||
LOG.info("TaskID for loading index task %s", taskID);
|
||||
indexer.waitUntilTaskCompletes(taskID, 10000, 120);
|
||||
ITRetryUtil.retryUntil(
|
||||
() -> coordinator.areSegmentsLoaded(BATCH_DATASOURCE),
|
||||
doIndexTest(
|
||||
BATCH_DATASOURCE,
|
||||
BATCH_TASK,
|
||||
specPathsTransform,
|
||||
BATCH_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
20000,
|
||||
10,
|
||||
"Segment-Load-Task-" + taskID
|
||||
true
|
||||
);
|
||||
}
|
||||
catch (Exception e) {
|
||||
LOG.error("data could not be loaded: %s", e.getMessage());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public void afterClass()
|
||||
@Test(dataProvider = "resources")
|
||||
public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Exception
|
||||
{
|
||||
if (dataLoaded) {
|
||||
try {
|
||||
unloadAndKillData(BATCH_DATASOURCE);
|
||||
}
|
||||
catch (Exception e) {
|
||||
LOG.warn(e, "exception while removing segments");
|
||||
}
|
||||
String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID();
|
||||
String reindexDatasource = REINDEX_DATASOURCE + "_" + UUID.randomUUID();
|
||||
|
||||
try (
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> specPathsTransform = spec -> {
|
||||
try {
|
||||
String path = "/batch_index/json";
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_PATHS%%",
|
||||
path
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%PARTITIONS_SPEC%%",
|
||||
jsonMapper.writeValueAsString(partitionsSpec)
|
||||
);
|
||||
|
||||
return spec;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
specPathsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
|
||||
doReindexTest(
|
||||
indexDatasource,
|
||||
reindexDatasource,
|
||||
REINDEX_TASK,
|
||||
REINDEX_QUERIES_RESOURCE
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket, path, and region for your data.
|
||||
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath
|
||||
* and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file.
|
||||
* 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys.
|
||||
* 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
|
||||
* 4) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs for env vars to provide.
|
||||
* 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HADOOP_S3_TO_HDFS)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITS3InputToHdfsHadoopIndexTest extends AbstractS3InputHadoopIndexTest
|
||||
{
|
||||
@Test()
|
||||
public void testS3IndexData() throws Exception
|
||||
{
|
||||
doTest();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.hadoop;
|
||||
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket, path, and region for your data.
|
||||
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath
|
||||
* and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file.
|
||||
* 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys.
|
||||
* 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
|
||||
* 4) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 for env vars to provide.
|
||||
* 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HADOOP_S3_TO_S3)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITS3InputToS3HadoopIndexTest extends AbstractS3InputHadoopIndexTest
|
||||
{
|
||||
@Test()
|
||||
public void testS3IndexData() throws Exception
|
||||
{
|
||||
doTest();
|
||||
}
|
||||
}
|
|
@ -85,14 +85,14 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
|
|||
private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);
|
||||
|
||||
@Inject
|
||||
IntegrationTestingConfig config;
|
||||
protected IntegrationTestingConfig config;
|
||||
@Inject
|
||||
protected SqlTestQueryHelper sqlQueryHelper;
|
||||
|
||||
@Inject
|
||||
ClientInfoResourceTestClient clientInfoResourceTestClient;
|
||||
|
||||
void doIndexTest(
|
||||
protected void doIndexTest(
|
||||
String dataSource,
|
||||
String indexTaskFilePath,
|
||||
String queryFilePath,
|
||||
|
@ -104,7 +104,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
|
|||
doIndexTest(dataSource, indexTaskFilePath, Function.identity(), queryFilePath, waitForNewVersion, runTestQueries, waitForSegmentsToLoad);
|
||||
}
|
||||
|
||||
void doIndexTest(
|
||||
protected void doIndexTest(
|
||||
String dataSource,
|
||||
String indexTaskFilePath,
|
||||
Function<String, String> taskSpecTransform,
|
||||
|
@ -151,7 +151,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
|
|||
}
|
||||
}
|
||||
|
||||
void doReindexTest(
|
||||
protected void doReindexTest(
|
||||
String baseDataSource,
|
||||
String reindexDataSource,
|
||||
String reindexTaskFilePath,
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
"type": "index_hadoop",
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "batchHadoop",
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"parser": {
|
||||
"type": "string",
|
||||
"type": "hadoopyString",
|
||||
"parseSpec": {
|
||||
"format": "tsv",
|
||||
"timestampSpec": {
|
||||
|
@ -53,7 +53,7 @@
|
|||
"type": "hadoop",
|
||||
"inputSpec": {
|
||||
"type": "static",
|
||||
"paths": "%%HADOOP_TEST_PATH%%"
|
||||
"paths": "/batch_index/tsv/batch_hadoop.data"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
|
@ -64,7 +64,19 @@
|
|||
"type": "hashed"
|
||||
},
|
||||
"jobProperties": {
|
||||
"fs.permissions.umask-mode": "022"
|
||||
"fs.permissions.umask-mode": "022",
|
||||
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||
"dfs.datanode.address" : "druid-it-hadoop",
|
||||
"dfs.client.use.datanode.hostname" : "true",
|
||||
"dfs.datanode.use.datanode.hostname" : "true",
|
||||
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.job.user.classpath.first" : "true",
|
||||
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.map.memory.mb" : 1024,
|
||||
"mapreduce.reduce.memory.mb" : 1024
|
||||
},
|
||||
"rowFlushBoundary": 10000
|
||||
}
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
{
|
||||
"type": "index_hadoop",
|
||||
"hadoopDependencyCoordinates" : ["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"],
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"parser": {
|
||||
"type": "hadoopyString",
|
||||
"parseSpec": {
|
||||
"format" : "json",
|
||||
"timestampSpec": {
|
||||
"column": "timestamp"
|
||||
},
|
||||
"dimensionsSpec": {
|
||||
"dimensions": [
|
||||
"page",
|
||||
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||
"user",
|
||||
"unpatrolled",
|
||||
"newPage",
|
||||
"robot",
|
||||
"anonymous",
|
||||
"namespace",
|
||||
"continent",
|
||||
"country",
|
||||
"region",
|
||||
"city"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"metricsSpec": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "thetaSketch",
|
||||
"type": "thetaSketch",
|
||||
"fieldName": "user"
|
||||
},
|
||||
{
|
||||
"name": "quantilesDoublesSketch",
|
||||
"type": "quantilesDoublesSketch",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "HLLSketchBuild",
|
||||
"type": "HLLSketchBuild",
|
||||
"fieldName": "user"
|
||||
}
|
||||
],
|
||||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||
}
|
||||
},
|
||||
"ioConfig": {
|
||||
"type": "hadoop",
|
||||
"inputSpec": {
|
||||
"type": "static",
|
||||
"paths": "%%INPUT_PATHS%%"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
"type": "hadoop",
|
||||
"partitionsSpec": {
|
||||
"assumeGrouped": true,
|
||||
"targetPartitionSize": 75000,
|
||||
"type": "hashed"
|
||||
},
|
||||
"jobProperties": {
|
||||
"fs.permissions.umask-mode": "022",
|
||||
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||
"dfs.datanode.address" : "druid-it-hadoop",
|
||||
"dfs.client.use.datanode.hostname" : "true",
|
||||
"dfs.datanode.use.datanode.hostname" : "true",
|
||||
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
|
||||
"mapreduce.job.user.classpath.first" : "true",
|
||||
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.map.memory.mb" : 1024,
|
||||
"mapreduce.reduce.memory.mb" : 1024,
|
||||
"fs.azure.account.key.%%AZURE_ACCOUNT%%.blob.core.windows.net":"%%AZURE_KEY%%"
|
||||
},
|
||||
"rowFlushBoundary": 10000
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
{
|
||||
"type": "index_hadoop",
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"parser": {
|
||||
"type": "hadoopyString",
|
||||
"parseSpec": {
|
||||
"format" : "json",
|
||||
"timestampSpec": {
|
||||
"column": "timestamp"
|
||||
},
|
||||
"dimensionsSpec": {
|
||||
"dimensions": [
|
||||
"page",
|
||||
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||
"user",
|
||||
"unpatrolled",
|
||||
"newPage",
|
||||
"robot",
|
||||
"anonymous",
|
||||
"namespace",
|
||||
"continent",
|
||||
"country",
|
||||
"region",
|
||||
"city"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"metricsSpec": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "thetaSketch",
|
||||
"type": "thetaSketch",
|
||||
"fieldName": "user"
|
||||
},
|
||||
{
|
||||
"name": "quantilesDoublesSketch",
|
||||
"type": "quantilesDoublesSketch",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "HLLSketchBuild",
|
||||
"type": "HLLSketchBuild",
|
||||
"fieldName": "user"
|
||||
}
|
||||
],
|
||||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||
}
|
||||
},
|
||||
"ioConfig": {
|
||||
"type": "hadoop",
|
||||
"inputSpec": {
|
||||
"type": "static",
|
||||
"paths": "%%INPUT_PATHS%%"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
"type": "hadoop",
|
||||
"partitionsSpec": {
|
||||
"assumeGrouped": true,
|
||||
"targetPartitionSize": 75000,
|
||||
"type": "hashed"
|
||||
},
|
||||
"jobProperties": {
|
||||
"fs.permissions.umask-mode": "022",
|
||||
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||
"dfs.datanode.address" : "druid-it-hadoop",
|
||||
"dfs.client.use.datanode.hostname" : "true",
|
||||
"dfs.datanode.use.datanode.hostname" : "true",
|
||||
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
|
||||
"mapreduce.job.user.classpath.first" : "true",
|
||||
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.map.memory.mb" : 1024,
|
||||
"mapreduce.reduce.memory.mb" : 1024,
|
||||
"fs.gs.auth.service.account.json.keyfile":"%%GCS_KEYFILE_PATH%%",
|
||||
"fs.gs.working.dir":"/",
|
||||
"fs.gs.path.encoding":"uri-path",
|
||||
"fs.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem",
|
||||
"fs.AbstractFileSystem.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS",
|
||||
"fs.gs.auth.service.account.enable": "true",
|
||||
"mapred.child.env":"GOOGLE_APPLICATION_CREDENTIALS=%%GCS_KEYFILE_PATH%%",
|
||||
"fs.gs.reported.permissions":"777"
|
||||
},
|
||||
"rowFlushBoundary": 10000
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
{
|
||||
"type": "index_hadoop",
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"parser": {
|
||||
"type": "hadoopyString",
|
||||
"parseSpec": {
|
||||
"format" : "json",
|
||||
"timestampSpec": {
|
||||
"column": "timestamp"
|
||||
},
|
||||
"dimensionsSpec": {
|
||||
"dimensions": [
|
||||
"page",
|
||||
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||
"user",
|
||||
"unpatrolled",
|
||||
"newPage",
|
||||
"robot",
|
||||
"anonymous",
|
||||
"namespace",
|
||||
"continent",
|
||||
"country",
|
||||
"region",
|
||||
"city"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"metricsSpec": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "thetaSketch",
|
||||
"type": "thetaSketch",
|
||||
"fieldName": "user"
|
||||
},
|
||||
{
|
||||
"name": "quantilesDoublesSketch",
|
||||
"type": "quantilesDoublesSketch",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "HLLSketchBuild",
|
||||
"type": "HLLSketchBuild",
|
||||
"fieldName": "user"
|
||||
}
|
||||
],
|
||||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||
}
|
||||
},
|
||||
"ioConfig": {
|
||||
"type": "hadoop",
|
||||
"inputSpec": {
|
||||
"type": "static",
|
||||
"paths": "%%INPUT_PATHS%%"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
"type": "hadoop",
|
||||
"partitionsSpec": %%PARTITIONS_SPEC%%,
|
||||
"jobProperties": {
|
||||
"fs.permissions.umask-mode": "022",
|
||||
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||
"dfs.datanode.address" : "druid-it-hadoop",
|
||||
"dfs.client.use.datanode.hostname" : "true",
|
||||
"dfs.datanode.use.datanode.hostname" : "true",
|
||||
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.job.user.classpath.first" : "true",
|
||||
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.map.memory.mb" : 1024,
|
||||
"mapreduce.reduce.memory.mb" : 1024
|
||||
},
|
||||
"rowFlushBoundary": 10000
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
{
|
||||
"type": "index_hadoop",
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "%%REINDEX_DATASOURCE%%",
|
||||
"parser": {
|
||||
"type": "hadoopyString",
|
||||
"parseSpec": {
|
||||
"format" : "json",
|
||||
"timestampSpec": {
|
||||
"column": "timestamp",
|
||||
"format": "iso"
|
||||
},
|
||||
"dimensionsSpec": {
|
||||
"dimensionExclusions" : ["robot", "continent"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"metricsSpec": [
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
}
|
||||
],
|
||||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-01" ]
|
||||
}
|
||||
},
|
||||
"ioConfig": {
|
||||
"type": "hadoop",
|
||||
"inputSpec": {
|
||||
"type": "dataSource",
|
||||
"ingestionSpec": {
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"intervals": ["2013-08-31/2013-09-01"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
"type": "hadoop",
|
||||
"partitionsSpec": {
|
||||
"assumeGrouped": true,
|
||||
"targetPartitionSize": 75000,
|
||||
"type": "hashed"
|
||||
},
|
||||
"jobProperties": {
|
||||
"fs.permissions.umask-mode": "022",
|
||||
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||
"dfs.datanode.address" : "druid-it-hadoop",
|
||||
"dfs.client.use.datanode.hostname" : "true",
|
||||
"dfs.datanode.use.datanode.hostname" : "true",
|
||||
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.job.user.classpath.first" : "true",
|
||||
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||
"mapreduce.map.memory.mb" : 1024,
|
||||
"mapreduce.reduce.memory.mb" : 1024
|
||||
},
|
||||
"rowFlushBoundary": 10000
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
{
|
||||
"type": "index_hadoop",
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"parser": {
|
||||
"type": "hadoopyString",
|
||||
"parseSpec": {
|
||||
"format" : "json",
|
||||
"timestampSpec": {
|
||||
"column": "timestamp"
|
||||
},
|
||||
"dimensionsSpec": {
|
||||
"dimensions": [
|
||||
"page",
|
||||
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||
"user",
|
||||
"unpatrolled",
|
||||
"newPage",
|
||||
"robot",
|
||||
"anonymous",
|
||||
"namespace",
|
||||
"continent",
|
||||
"country",
|
||||
"region",
|
||||
"city"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"metricsSpec": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "thetaSketch",
|
||||
"type": "thetaSketch",
|
||||
"fieldName": "user"
|
||||
},
|
||||
{
|
||||
"name": "quantilesDoublesSketch",
|
||||
"type": "quantilesDoublesSketch",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "HLLSketchBuild",
|
||||
"type": "HLLSketchBuild",
|
||||
"fieldName": "user"
|
||||
}
|
||||
],
|
||||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||
}
|
||||
},
|
||||
"ioConfig": {
|
||||
"type": "hadoop",
|
||||
"inputSpec": {
|
||||
"type": "static",
|
||||
"paths": "%%INPUT_PATHS%%"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
"type": "hadoop",
|
||||
"partitionsSpec": {
|
||||
"assumeGrouped": true,
|
||||
"targetPartitionSize": 75000,
|
||||
"type": "hashed"
|
||||
},
|
||||
"jobProperties": {
|
||||
"fs.permissions.umask-mode": "022",
|
||||
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||
"dfs.datanode.address" : "druid-it-hadoop",
|
||||
"dfs.client.use.datanode.hostname" : "true",
|
||||
"dfs.datanode.use.datanode.hostname" : "true",
|
||||
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
|
||||
"mapreduce.job.user.classpath.first" : "true",
|
||||
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
|
||||
"mapreduce.map.memory.mb" : 1024,
|
||||
"mapreduce.reduce.memory.mb" : 1024,
|
||||
"fs.s3.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
|
||||
"fs.s3.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
|
||||
"fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
|
||||
"fs.s3n.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
|
||||
"fs.s3n.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
|
||||
"fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
|
||||
"fs.s3a.access.key" : "%%AWS_ACCESS_KEY%%",
|
||||
"fs.s3a.secret.key" : "%%AWS_SECRET_KEY%%",
|
||||
"fs.s3a.impl" : "org.apache.hadoop.fs.s3a.S3AFileSystem"
|
||||
},
|
||||
"rowFlushBoundary": 10000
|
||||
}
|
||||
}
|
||||
}
|
|
@ -28,7 +28,6 @@
|
|||
<test name="AllSerializedTests">
|
||||
<packages>
|
||||
<package name="org.apache.druid.tests.*">
|
||||
<exclude name="org.apache.druid.tests.hadoop"/>
|
||||
<exclude name="org.apache.druid.tests.parallelized"/>
|
||||
</package>
|
||||
</packages>
|
||||
|
|
Loading…
Reference in New Issue