From 61295bd00262d10a57f38a4c18add412dc85ceb5 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Thu, 30 Apr 2020 14:33:01 -0700 Subject: [PATCH] More Hadoop integration tests (#9714) * More Hadoop integration tests * Add missing s3 instructions * Address PR comments * Address PR comments * PR comments * Fix typo --- .travis.yml | 4 +- integration-tests/README.md | 26 +-- .../override-examples/hadoop/azure_to_azure | 31 ++++ .../override-examples/hadoop/azure_to_hdfs | 34 ++++ .../override-examples/hadoop/gcs_to_gcs | 30 ++++ .../override-examples/hadoop/gcs_to_hdfs | 30 ++++ .../override-examples/hadoop/s3_to_hdfs | 34 ++++ .../override-examples/hadoop/s3_to_s3 | 35 ++++ .../docker/environment-configs/router | 2 + .../generate-client-certs-and-keystores.sh | 2 - .../tls/generate-expired-client-cert.sh | 2 +- .../docker/tls/generate-good-client-cert.sh | 2 +- ...generate-incorrect-hostname-client-cert.sh | 2 +- ...nerate-invalid-intermediate-client-cert.sh | 4 +- .../tls/generate-to-be-revoked-client-cert.sh | 4 +- .../generate-untrusted-root-client-cert.sh | 2 +- ...generate-valid-intermediate-client-cert.sh | 4 +- integration-tests/pom.xml | 3 +- integration-tests/run_cluster.sh | 9 +- .../testing/ConfigFileConfigProvider.java | 24 +++ .../druid/testing/DockerConfigProvider.java | 34 +++- .../testing/IntegrationTestingConfig.java | 6 + .../org/apache/druid/tests/TestNGGroup.java | 11 +- .../AbstractAzureInputHadoopIndexTest.java | 83 +++++++++ .../AbstractGcsInputHadoopIndexTest.java | 79 ++++++++ .../AbstractS3InputHadoopIndexTest.java | 96 ++++++++++ .../ITAzureInputToAzureHadoopIndexTest.java | 50 ++++++ .../ITAzureInputToHdfsHadoopIndexTest.java | 50 ++++++ .../ITGcsInputToGcsHadoopIndexTest.java | 51 ++++++ .../ITGcsInputToHdfsHadoopIndexTest.java | 51 ++++++ .../druid/tests/hadoop/ITHadoopIndexTest.java | 169 ++++++++++++------ .../ITS3InputToHdfsHadoopIndexTest.java | 49 +++++ .../hadoop/ITS3InputToS3HadoopIndexTest.java | 49 +++++ .../indexer/AbstractITBatchIndexTest.java | 8 +- .../batch_index/tsv}/batch_hadoop.data | 0 .../hadoop/batch_hadoop_indexer.json | 20 ++- ...kipedia_hadoop_azure_input_index_task.json | 107 +++++++++++ ...wikipedia_hadoop_gcs_input_index_task.json | 113 ++++++++++++ .../hadoop/wikipedia_hadoop_index_task.json | 101 +++++++++++ .../hadoop/wikipedia_hadoop_reindex_task.json | 77 ++++++++ .../wikipedia_hadoop_s3_input_index_task.json | 114 ++++++++++++ .../src/test/resources/testng.xml | 1 - 42 files changed, 1501 insertions(+), 102 deletions(-) create mode 100644 integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure create mode 100644 integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs create mode 100644 integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs create mode 100644 integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs create mode 100644 integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs create mode 100644 integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java rename integration-tests/src/test/resources/{hadoop => data/batch_index/tsv}/batch_hadoop.data (100%) create mode 100644 integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json create mode 100644 integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json create mode 100644 integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json create mode 100644 integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json create mode 100644 integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json diff --git a/.travis.yml b/.travis.yml index 770c8fcc7ab..6a508919c72 100644 --- a/.travis.yml +++ b/.travis.yml @@ -373,7 +373,7 @@ jobs: name: "(Compile=openjdk8, Run=openjdk8) other integration test" jdk: openjdk8 services: *integration_test_services - env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=8' + env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8' script: *run_integration_test after_failure: *integration_test_diags # END - Integration tests for Compile with Java 8 and Run with Java 8 @@ -407,7 +407,7 @@ jobs: - <<: *integration_tests name: "(Compile=openjdk8, Run=openjdk11) other integration test" jdk: openjdk8 - env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=11' + env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11' # END - Integration tests for Compile with Java 8 and Run with Java 11 - name: "security vulnerabilities" diff --git a/integration-tests/README.md b/integration-tests/README.md index 897c8890a38..7fe1bc2b5da 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -214,31 +214,33 @@ of the integration test run discussed above. This is because druid test clusters might not, in general, have access to hadoop. This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage. To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways: -1) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE). -2) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command. +1) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command. +2) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE). Currently, hdfs-deep-storage and other -deep-storage integration test groups can only be run with Druid Docker test clusters by passing -Dstart.hadoop.docker=true to start Hadoop container. You will also have to provide -Doverride.config.path= with your Druid's Hadoop configs set. See integration-tests/docker/environment-configs/override-examples/hdfs directory for example. Note that if the integration test you are running also uses other cloud extension (S3, Azure, GCS), additional -credentials/configs may need to be set in the same file as your Druid's Hadoop configs set. +credentials/configs may need to be set in the same file as your Druid's Hadoop configs set. -Currently, ITHadoopIndexTest can only be run with your own Druid + Hadoop cluster by following the below steps: -Create a directory called batchHadoop1 in the hadoop file system -(anywhere you want) and put batch_hadoop.data (integration-tests/src/test/resources/hadoop/batch_hadoop.data) -into that directory (as its only file). +If you are running ITHadoopIndexTest with your own Druid + Hadoop cluster, please follow the below steps: +- Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/ +- Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS + at /batch_index/tsv/ +If using the Docker-based Hadoop container, the steps above are automatically done by the integration tests. -Add this keyword to the configuration file (see above): +When running the Hadoop tests, you must set `-Dextra.datasource.name.suffix=''`, due to https://github.com/apache/druid/issues/9788. +Run the test using mvn (using the bundled Docker-based Hadoop cluster): ``` - "hadoopTestDir": "" + mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Dstart.hadoop.docker=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix='' ``` -Run the test using mvn: - +Run the test using mvn (using config file for existing Hadoop cluster): ``` - mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest + mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest -Dextra.datasource.name.suffix='' ``` In some test environments, the machine where the tests need to be executed diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure new file mode 100644 index 00000000000..6564b7d83c4 --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace with your cloud configs/credentials +# +druid_storage_type=azure +druid_azure_account= +druid_azure_key= +druid_azure_container= + +druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"] + +druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"] diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs new file mode 100644 index 00000000000..26ce1343a59 --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs @@ -0,0 +1,34 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace with your cloud configs/credentials +# +druid_storage_type=hdfs +druid_storage_storageDirectory=/druid/segments + +druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"] + +# Not used since we have HDFS deep storage, but the Druid Azure extension requires these to be defined +druid_azure_account= +druid_azure_key= +druid_azure_container= + +druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"] diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs b/integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs new file mode 100644 index 00000000000..40bf7aa3b54 --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace and with your cloud configs/credentials +# +druid_storage_type=google +druid_google_bucket= +druid_google_prefix= + +druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"] + +GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/ diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs new file mode 100644 index 00000000000..1930663ea1e --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace and with your cloud configs/credentials +# +druid_storage_type=hdfs +druid_storage_storageDirectory=/druid/segments + +druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"] + +GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/ + diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs new file mode 100644 index 00000000000..cd7097346d8 --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs @@ -0,0 +1,34 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace with your cloud configs/credentials +# +druid_s3_accessKey= +druid_s3_secretKey= + +druid_storage_type=hdfs +druid_storage_storageDirectory=/druid/segments + +AWS_REGION= + +druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"] + +druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 new file mode 100644 index 00000000000..4ad6896b093 --- /dev/null +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Example of override config file to provide. +# Please replace with your cloud configs/credentials +# +druid_s3_accessKey= +druid_s3_secretKey= + +druid_storage_type=s3 +druid_storage_bucket= +druid_storage_baseKey= + +AWS_REGION= + +druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"] + +druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] diff --git a/integration-tests/docker/environment-configs/router b/integration-tests/docker/environment-configs/router index f25b23ee8cf..c3f4e23d966 100644 --- a/integration-tests/docker/environment-configs/router +++ b/integration-tests/docker/environment-configs/router @@ -27,3 +27,5 @@ SERVICE_DRUID_JAVA_OPTS=-server -Xmx128m -XX:+UseG1GC -agentlib:jdwp=transport=d druid_auth_basic_common_cacheDirectory=/tmp/authCache/router druid_sql_avatica_enable=true druid_server_https_crlPath=/tls/revocations.crl +druid_router_managementProxy_enabled=true + diff --git a/integration-tests/docker/tls/generate-client-certs-and-keystores.sh b/integration-tests/docker/tls/generate-client-certs-and-keystores.sh index 4bc8774877f..3d48db05d56 100755 --- a/integration-tests/docker/tls/generate-client-certs-and-keystores.sh +++ b/integration-tests/docker/tls/generate-client-certs-and-keystores.sh @@ -32,5 +32,3 @@ cd client_tls ../docker/tls/generate-to-be-revoked-client-cert.sh ../docker/tls/generate-untrusted-root-client-cert.sh ../docker/tls/generate-valid-intermediate-client-cert.sh - - diff --git a/integration-tests/docker/tls/generate-expired-client-cert.sh b/integration-tests/docker/tls/generate-expired-client-cert.sh index dd05847644a..9519dd34d01 100755 --- a/integration-tests/docker/tls/generate-expired-client-cert.sh +++ b/integration-tests/docker/tls/generate-expired-client-cert.sh @@ -118,7 +118,7 @@ rm -rf certs.seq echo 11111115 > certs.seq # Generate a client certificate for this machine -openssl genrsa -out expired_client.key 1024 -sha256 +openssl genrsa -out expired_client.key 1024 openssl req -new -out expired_client.csr -key expired_client.key -reqexts req_ext -config expired_csr.conf openssl ca -batch -config root_for_expired_client.cnf -policy policy_loose -out expired_client.pem -outdir . -startdate 101010000000Z -enddate 101011000000Z -extensions v3_ca -cert root.pem -keyfile root.key -infiles expired_client.csr diff --git a/integration-tests/docker/tls/generate-good-client-cert.sh b/integration-tests/docker/tls/generate-good-client-cert.sh index 0f16c1449c5..63d3175bf6e 100755 --- a/integration-tests/docker/tls/generate-good-client-cert.sh +++ b/integration-tests/docker/tls/generate-good-client-cert.sh @@ -50,7 +50,7 @@ DNS.2 = localhost EOT # Generate a client certificate for this machine -openssl genrsa -out client.key 1024 -sha256 +openssl genrsa -out client.key 1024 openssl req -new -out client.csr -key client.key -reqexts req_ext -config csr.conf openssl x509 -req -days 3650 -in client.csr -CA root.pem -CAkey root.key -set_serial 0x11111111 -out client.pem -sha256 -extfile csr.conf -extensions req_ext diff --git a/integration-tests/docker/tls/generate-incorrect-hostname-client-cert.sh b/integration-tests/docker/tls/generate-incorrect-hostname-client-cert.sh index 41a7a7d6bef..2d224dcea85 100755 --- a/integration-tests/docker/tls/generate-incorrect-hostname-client-cert.sh +++ b/integration-tests/docker/tls/generate-incorrect-hostname-client-cert.sh @@ -46,7 +46,7 @@ DNS.1 = thisisprobablywrongtoo EOT -openssl genrsa -out invalid_hostname_client.key 1024 -sha256 +openssl genrsa -out invalid_hostname_client.key 1024 openssl req -new -out invalid_hostname_client.csr -key invalid_hostname_client.key -reqexts req_ext -config invalid_hostname_csr.conf openssl x509 -req -days 3650 -in invalid_hostname_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111112 -out invalid_hostname_client.pem -sha256 -extfile invalid_hostname_csr.conf -extensions req_ext diff --git a/integration-tests/docker/tls/generate-invalid-intermediate-client-cert.sh b/integration-tests/docker/tls/generate-invalid-intermediate-client-cert.sh index 4744e9f4a8f..2016b810c5e 100755 --- a/integration-tests/docker/tls/generate-invalid-intermediate-client-cert.sh +++ b/integration-tests/docker/tls/generate-invalid-intermediate-client-cert.sh @@ -45,7 +45,7 @@ IP.1 = 9.9.9.9 EOT # Generate a bad intermediate certificate -openssl genrsa -out invalid_ca_intermediate.key 1024 -sha256 +openssl genrsa -out invalid_ca_intermediate.key 1024 openssl req -new -out invalid_ca_intermediate.csr -key invalid_ca_intermediate.key -reqexts req_ext -config invalid_ca_intermediate.conf openssl x509 -req -days 3650 -in invalid_ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333331 -out invalid_ca_intermediate.pem -sha256 -extfile invalid_ca_intermediate.conf -extensions req_ext @@ -81,7 +81,7 @@ DNS.2 = localhost EOT # Generate a client certificate for this machine -openssl genrsa -out invalid_ca_client.key 1024 -sha256 +openssl genrsa -out invalid_ca_client.key 1024 openssl req -new -out invalid_ca_client.csr -key invalid_ca_client.key -reqexts req_ext -config invalid_ca_client.conf openssl x509 -req -days 3650 -in invalid_ca_client.csr -CA invalid_ca_intermediate.pem -CAkey invalid_ca_intermediate.key -set_serial 0x33333333 -out invalid_ca_client.pem -sha256 -extfile invalid_ca_client.conf -extensions req_ext diff --git a/integration-tests/docker/tls/generate-to-be-revoked-client-cert.sh b/integration-tests/docker/tls/generate-to-be-revoked-client-cert.sh index e1d9c6687cc..40588201cb8 100755 --- a/integration-tests/docker/tls/generate-to-be-revoked-client-cert.sh +++ b/integration-tests/docker/tls/generate-to-be-revoked-client-cert.sh @@ -52,9 +52,9 @@ DNS.2 = localhost EOT # Generate a client certificate for this machine -openssl genrsa -out revoked_client.key 1024 -sha256 +openssl genrsa -out revoked_client.key 1024 openssl req -new -out revoked_client.csr -key revoked_client.key -reqexts req_ext -config revoked_csr.conf -openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile csr.conf -extensions req_ext +openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile revoked_csr.conf -extensions req_ext # Create a Java keystore containing the generated certificate openssl pkcs12 -export -in revoked_client.pem -inkey revoked_client.key -out revoked_client.p12 -name revoked_druid -CAfile root.pem -caname druid-it-root -password pass:druid123 diff --git a/integration-tests/docker/tls/generate-untrusted-root-client-cert.sh b/integration-tests/docker/tls/generate-untrusted-root-client-cert.sh index b68c66f43be..3773209360d 100755 --- a/integration-tests/docker/tls/generate-untrusted-root-client-cert.sh +++ b/integration-tests/docker/tls/generate-untrusted-root-client-cert.sh @@ -50,7 +50,7 @@ DNS.2 = localhost EOT # Generate a client certificate for this machine -openssl genrsa -out client_another_root.key 1024 -sha256 +openssl genrsa -out client_another_root.key 1024 openssl req -new -out client_another_root.csr -key client_another_root.key -reqexts req_ext -config csr_another_root.conf openssl x509 -req -days 3650 -in client_another_root.csr -CA untrusted_root.pem -CAkey untrusted_root.key -set_serial 0x11111114 -out client_another_root.pem -sha256 -extfile csr_another_root.conf -extensions req_ext diff --git a/integration-tests/docker/tls/generate-valid-intermediate-client-cert.sh b/integration-tests/docker/tls/generate-valid-intermediate-client-cert.sh index 53e630db021..5d26f01f793 100755 --- a/integration-tests/docker/tls/generate-valid-intermediate-client-cert.sh +++ b/integration-tests/docker/tls/generate-valid-intermediate-client-cert.sh @@ -45,7 +45,7 @@ IP.1 = 9.9.9.9 EOT # Generate an intermediate certificate -openssl genrsa -out ca_intermediate.key 1024 -sha256 +openssl genrsa -out ca_intermediate.key 1024 openssl req -new -out ca_intermediate.csr -key ca_intermediate.key -reqexts req_ext -config ca_intermediate.conf openssl x509 -req -days 3650 -in ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333332 -out ca_intermediate.pem -sha256 -extfile ca_intermediate.conf -extensions req_ext @@ -81,7 +81,7 @@ DNS.2 = localhost EOT # Generate a client certificate for this machine -openssl genrsa -out intermediate_ca_client.key 1024 -sha256 +openssl genrsa -out intermediate_ca_client.key 1024 openssl req -new -out intermediate_ca_client.csr -key intermediate_ca_client.key -reqexts req_ext -config intermediate_ca_client.conf openssl x509 -req -days 3650 -in intermediate_ca_client.csr -CA ca_intermediate.pem -CAkey ca_intermediate.key -set_serial 0x33333333 -out intermediate_ca_client.pem -sha256 -extfile intermediate_ca_client.conf -extensions req_ext diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index e7db3a20f6c..9d10680dc95 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -365,6 +365,7 @@ false + \ Россия\ 한국\ 中国!? @@ -430,7 +431,7 @@ -Dfile.encoding=UTF-8 -Ddruid.test.config.dockerIp=${env.DOCKER_IP} -Ddruid.test.config.hadoopDir=${env.HADOOP_DIR} - -Ddruid.test.config.extraDatasourceNameSuffix=\ Россия\ 한국\ 中国!? + -Ddruid.test.config.extraDatasourceNameSuffix=${extra.datasource.name.suffix} -Ddruid.zk.service.host=${env.DOCKER_IP} -Ddruid.client.https.trustStorePath=client_tls/truststore.jks -Ddruid.client.https.trustStorePassword=druid123 diff --git a/integration-tests/run_cluster.sh b/integration-tests/run_cluster.sh index ba07e36d8f7..faaa4eaa1ee 100755 --- a/integration-tests/run_cluster.sh +++ b/integration-tests/run_cluster.sh @@ -80,17 +80,18 @@ if [ -n "$DRUID_INTEGRATION_TEST_SKIP_START_DOCKER" ] && [ "$DRUID_INTEGRATION_T # For druid-kinesis-indexing-service mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service - $ For druid-parquet-extensions + # For druid-parquet-extensions mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions - $ For druid-orc-extensions + # For druid-orc-extensions mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions # Pull Hadoop dependency if needed if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] then - java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 + java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 -h org.apache.hadoop:hadoop-azure:2.8.5 + curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar fi # install logging config @@ -243,4 +244,4 @@ fi # Start Router with custom TLS cert checkers docker run -d --privileged --net druid-it-net --ip 172.172.172.12 ${COMMON_ENV} ${ROUTER_CUSTOM_CHECK_TLS_ENV} ${OVERRIDE_ENV} --hostname druid-router-custom-check-tls --name druid-router-custom-check-tls -p 5003:5003 -p 8891:8891 -p 9091:9091 -v $SHARED_DIR:/shared -v $SERVICE_SUPERVISORDS_DIR/druid.conf:$SUPERVISORDIR/druid.conf --link druid-zookeeper-kafka:druid-zookeeper-kafka --link druid-coordinator:druid-coordinator --link druid-broker:druid-broker druid/cluster -} \ No newline at end of file + } \ No newline at end of file diff --git a/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java b/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java index 1fec42c0e44..320ef315b01 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java @@ -57,6 +57,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide private String password; private String cloudBucket; private String cloudPath; + private String cloudRegion; + private String hadoopGcsCredentialsPath; + private String azureKey; private String streamEndpoint; @JsonCreator @@ -193,6 +196,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide cloudBucket = props.get("cloud_bucket"); cloudPath = props.get("cloud_path"); + cloudRegion = props.get("cloud_region"); + hadoopGcsCredentialsPath = props.get("hadoopGcsCredentialsPath"); + azureKey = props.get("azureKey"); streamEndpoint = props.get("stream_endpoint"); LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl); @@ -356,6 +362,24 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide return cloudPath; } + @Override + public String getCloudRegion() + { + return cloudRegion; + } + + @Override + public String getAzureKey() + { + return azureKey; + } + + @Override + public String getHadoopGcsCredentialsPath() + { + return hadoopGcsCredentialsPath; + } + @Override public String getStreamEndpoint() { diff --git a/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java b/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java index e33e12188d5..11c540fb69d 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java @@ -33,10 +33,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider @NotNull private String dockerIp; - @JsonProperty - @NotNull - private String hadoopDir; - @JsonProperty private String extraDatasourceNameSuffix = ""; @@ -46,6 +42,15 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider @JsonProperty private String cloudBucket; + @JsonProperty + private String cloudRegion; + + @JsonProperty + private String hadoopGcsCredentialsPath; + + @JsonProperty + private String azureKey; + @JsonProperty private String streamEndpoint; @@ -185,9 +190,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider @Override public String getProperty(String prop) { - if ("hadoopTestDir".equals(prop)) { - return hadoopDir; - } throw new UnsupportedOperationException("DockerConfigProvider does not support property " + prop); } @@ -233,6 +235,24 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider return cloudPath; } + @Override + public String getCloudRegion() + { + return cloudRegion; + } + + @Override + public String getAzureKey() + { + return azureKey; + } + + @Override + public String getHadoopGcsCredentialsPath() + { + return hadoopGcsCredentialsPath; + } + @Override public String getStreamEndpoint() { diff --git a/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java b/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java index 17f2aab844a..32fc8c2a87e 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java @@ -89,5 +89,11 @@ public interface IntegrationTestingConfig String getCloudPath(); + String getCloudRegion(); + + String getAzureKey(); + + String getHadoopGcsCredentialsPath(); + String getStreamEndpoint(); } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java index c4c68644fa8..58a3a1b07a1 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java @@ -27,8 +27,6 @@ public class TestNGGroup { public static final String BATCH_INDEX = "batch-index"; - public static final String HADOOP_INDEX = "hadoop-index"; - public static final String KAFKA_INDEX = "kafka-index"; public static final String KAFKA_INDEX_SLOW = "kafka-index-slow"; @@ -86,6 +84,15 @@ public class TestNGGroup */ public static final String HDFS_DEEP_STORAGE = "hdfs-deep-storage"; + public static final String HADOOP_S3_TO_S3 = "hadoop-s3-to-s3-deep-storage"; + public static final String HADOOP_S3_TO_HDFS = "hadoop-s3-to-hdfs-deep-storage"; + + public static final String HADOOP_AZURE_TO_AZURE = "hadoop-azure-to-azure-deep-storage"; + public static final String HADOOP_AZURE_TO_HDFS = "hadoop-azure-to-hdfs-deep-storage"; + + public static final String HADOOP_GCS_TO_GCS = "hadoop-gcs-to-gcs-deep-storage"; + public static final String HADOOP_GCS_TO_HDFS = "hadoop-gcs-to-hdfs-deep-storage"; + /** * This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file. * The path of the file must then be pass to mvn with -Doverride.config.path= diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java new file mode 100644 index 00000000000..2a6894ef91d --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractAzureInputHadoopIndexTest.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; + +import java.io.Closeable; +import java.util.UUID; +import java.util.function.Function; + +public abstract class AbstractAzureInputHadoopIndexTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_azure_input_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + + void doTest() throws Exception + { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + final Function azurePropsTransform = spec -> { + try { + String path = StringUtils.format( + "wasbs://%s@%s.blob.core.windows.net/", + config.getCloudPath(), + config.getCloudBucket() + ); + spec = StringUtils.replace( + spec, + "%%INPUT_PATHS%%", + path + ); + + spec = StringUtils.replace( + spec, + "%%AZURE_ACCOUNT%%", + config.getCloudBucket() + ); + + spec = StringUtils.replace( + spec, + "%%AZURE_KEY%%", + config.getAzureKey() + ); + + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + azurePropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java new file mode 100644 index 00000000000..244a2f5438d --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractGcsInputHadoopIndexTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; + +import java.io.Closeable; +import java.util.UUID; +import java.util.function.Function; + +public abstract class AbstractGcsInputHadoopIndexTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_gcs_input_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + + void doTest() throws Exception + { + final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID(); + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + final Function gcsPropsTransform = spec -> { + try { + + String path = StringUtils.format( + "gs://%s/%s", + config.getCloudBucket(), + config.getCloudPath() + ); + + spec = StringUtils.replace( + spec, + "%%INPUT_PATHS%%", + path + ); + + spec = StringUtils.replace( + spec, + "%%GCS_KEYFILE_PATH%%", + config.getHadoopGcsCredentialsPath() + ); + + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + gcsPropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java new file mode 100644 index 00000000000..86dcdfaacff --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/AbstractS3InputHadoopIndexTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import com.google.inject.Inject; +import org.apache.druid.common.aws.AWSCredentialsConfig; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; + +import java.io.Closeable; +import java.util.UUID; +import java.util.function.Function; + +public abstract class AbstractS3InputHadoopIndexTest extends AbstractITBatchIndexTest +{ + @Inject + protected AWSCredentialsConfig awsCredentialsConfig; + + private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_s3_input_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + + void doTest() throws Exception + { + final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID(); + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + final Function s3PropsTransform = spec -> { + try { + + String path = StringUtils.format( + "s3a://%s/%s", + config.getCloudBucket(), + config.getCloudPath() + ); + + spec = StringUtils.replace( + spec, + "%%INPUT_PATHS%%", + path + ); + + spec = StringUtils.replace( + spec, + "%%AWS_ACCESS_KEY%%", + awsCredentialsConfig.getAccessKey().getPassword() + ); + + spec = StringUtils.replace( + spec, + "%%AWS_SECRET_KEY%%", + awsCredentialsConfig.getSecretKey().getPassword() + ); + + spec = StringUtils.replace( + spec, + "%%AWS_REGION%%", + config.getCloudRegion() + ); + + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + s3PropsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + } + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java new file mode 100644 index 00000000000..47a5d9f8a85 --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToAzureHadoopIndexTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the account, container, and key for your data. + * This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath, + * and -Ddruid.test.config.azureKey. + * - `cloudBucket` should be set to your Azure account name + * - `cloudPath` should be set to the Azure container where the input data resides + * - `azureKey` should be set to an Azure access key that can read from the container above + * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1. + * 3) Provide -Doverride.config.path= with Azure+Hadoop configs set. See + * integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure for env vars to provide. + * 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command + */ +@Test(groups = TestNGGroup.HADOOP_AZURE_TO_AZURE) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITAzureInputToAzureHadoopIndexTest extends AbstractAzureInputHadoopIndexTest +{ + public void testGcsIndexData() throws Exception + { + doTest(); + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java new file mode 100644 index 00000000000..915c14c408f --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITAzureInputToHdfsHadoopIndexTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the account, container, and key for your data. + * This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath, + * and -Ddruid.test.config.azureKey. + * - `cloudBucket` should be set to your Azure account name + * - `cloudPath` should be set to the Azure container where the input data resides + * - `azureKey` should be set to an Azure access key that can read from the container above + * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1. + * 3) Provide -Doverride.config.path= with Azure+Hadoop configs set. See + * integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs for env vars to provide. + * 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command + */ +@Test(groups = TestNGGroup.HADOOP_AZURE_TO_HDFS) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITAzureInputToHdfsHadoopIndexTest extends AbstractAzureInputHadoopIndexTest +{ + public void testGcsIndexData() throws Exception + { + doTest(); + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java new file mode 100644 index 00000000000..5b0e4ec9557 --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToGcsHadoopIndexTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and + * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. + * 2) Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it + * exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the + * shared folder used by the integration test containers if running the Docker-based Hadoop container, + * in which case this property can be set to /shared/ + * 3) Provide -Dresource.file.dir.path= with folder that contains GOOGLE_APPLICATION_CREDENTIALS file + * 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1. + * 5) Provide -Doverride.config.path= with gcs configs set. See + * integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs for env vars to provide. + * 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command + */ +@Test(groups = TestNGGroup.HADOOP_GCS_TO_GCS) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITGcsInputToGcsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest +{ + public void testGcsIndexData() throws Exception + { + doTest(); + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java new file mode 100644 index 00000000000..314d5d7164d --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITGcsInputToHdfsHadoopIndexTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and + * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. + * 2. Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it + * exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the + * shared folder used by the integration test containers if running the Docker-based Hadoop container, + * in which case this property can be set to /shared/ + * 3) Provide -Dresource.file.dir.path= with folder that contains GOOGLE_APPLICATION_CREDENTIALS file + * 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1. + * 5) Provide -Doverride.config.path= with gcs configs set. See + * integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs for env vars to provide. + * 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command + */ +@Test(groups = TestNGGroup.HADOOP_GCS_TO_HDFS) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITGcsInputToHdfsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest +{ + public void testGcsIndexData() throws Exception + { + doTest(); + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java index 38fbaf19ad8..496729ad45d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java @@ -19,87 +19,150 @@ package org.apache.druid.tests.hadoop; -import com.google.inject.Inject; +import com.google.common.collect.ImmutableList; +import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec; +import org.apache.druid.indexer.partitions.HashedPartitionsSpec; +import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.testing.IntegrationTestingConfig; import org.apache.druid.testing.guice.DruidTestModuleFactory; -import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.tests.TestNGGroup; -import org.apache.druid.tests.indexer.AbstractIndexerTest; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; +import org.apache.druid.tests.indexer.AbstractITBatchIndexTest; +import org.testng.annotations.DataProvider; import org.testng.annotations.Guice; import org.testng.annotations.Test; -@Test(groups = TestNGGroup.HADOOP_INDEX) +import java.io.Closeable; +import java.util.UUID; +import java.util.function.Function; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/ + * If using the Docker-based Hadoop container, this is automatically done by the integration tests. + * 2) Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS + * at /batch_index/tsv/ + * If using the Docker-based Hadoop container, this is automatically done by the integration tests. + * 2) Provide -Doverride.config.path= with HDFS configs set. See + * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. + * 3) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command + */ +@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE) @Guice(moduleFactory = DruidTestModuleFactory.class) -public class ITHadoopIndexTest extends AbstractIndexerTest +public class ITHadoopIndexTest extends AbstractITBatchIndexTest { private static final Logger LOG = new Logger(ITHadoopIndexTest.class); + private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json"; private static final String BATCH_QUERIES_RESOURCE = "/hadoop/batch_hadoop_queries.json"; private static final String BATCH_DATASOURCE = "batchHadoop"; - private boolean dataLoaded = false; - @Inject - private IntegrationTestingConfig config; + private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_index_task.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; + private static final String INDEX_DATASOURCE = "wikipedia_hadoop_index_test"; - @BeforeClass - public void beforeClass() + private static final String REINDEX_TASK = "/hadoop/wikipedia_hadoop_reindex_task.json"; + private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries.json"; + private static final String REINDEX_DATASOURCE = "wikipedia_hadoop_reindex_test"; + + @DataProvider + public static Object[][] resources() { - loadData(config.getProperty("hadoopTestDir") + "/batchHadoop1"); - dataLoaded = true; + return new Object[][]{ + {new HashedPartitionsSpec(3, null, null)}, + {new HashedPartitionsSpec(null, 3, ImmutableList.of("page"))}, + {new HashedPartitionsSpec(null, 3, ImmutableList.of("page", "user"))}, + {new SingleDimensionPartitionsSpec(1000, null, null, false)}, + {new SingleDimensionPartitionsSpec(1000, null, "page", false)}, + {new SingleDimensionPartitionsSpec(1000, null, null, true)}, + + //{new HashedPartitionsSpec(null, 3, null)} // this results in a bug where the segments have 0 rows + }; } @Test - public void testHadoopIndex() throws Exception + public void testLegacyITHadoopIndexTest() throws Exception { - queryHelper.testQueriesFromFile(BATCH_QUERIES_RESOURCE, 2); - } + try ( + final Closeable ignored0 = unloader(BATCH_DATASOURCE + config.getExtraDatasourceNameSuffix()); + ) { + final Function specPathsTransform = spec -> { + try { + String path = "/batch_index/tsv"; + spec = StringUtils.replace( + spec, + "%%INPUT_PATHS%%", + path + ); - private void loadData(String hadoopDir) - { - String indexerSpec; + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; - try { - LOG.info("indexerFile name: [%s]", BATCH_TASK); - indexerSpec = getResourceAsString(BATCH_TASK); - indexerSpec = StringUtils.replace(indexerSpec, "%%HADOOP_TEST_PATH%%", hadoopDir); - } - catch (Exception e) { - LOG.error("could not read and modify indexer file: %s", e.getMessage()); - throw new RuntimeException(e); - } - - try { - final String taskID = indexer.submitTask(indexerSpec); - LOG.info("TaskID for loading index task %s", taskID); - indexer.waitUntilTaskCompletes(taskID, 10000, 120); - ITRetryUtil.retryUntil( - () -> coordinator.areSegmentsLoaded(BATCH_DATASOURCE), + doIndexTest( + BATCH_DATASOURCE, + BATCH_TASK, + specPathsTransform, + BATCH_QUERIES_RESOURCE, + false, true, - 20000, - 10, - "Segment-Load-Task-" + taskID + true ); } - catch (Exception e) { - LOG.error("data could not be loaded: %s", e.getMessage()); - throw new RuntimeException(e); - } } - @AfterClass - public void afterClass() + @Test(dataProvider = "resources") + public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Exception { - if (dataLoaded) { - try { - unloadAndKillData(BATCH_DATASOURCE); - } - catch (Exception e) { - LOG.warn(e, "exception while removing segments"); - } + String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID(); + String reindexDatasource = REINDEX_DATASOURCE + "_" + UUID.randomUUID(); + + try ( + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); + final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()); + ) { + final Function specPathsTransform = spec -> { + try { + String path = "/batch_index/json"; + spec = StringUtils.replace( + spec, + "%%INPUT_PATHS%%", + path + ); + spec = StringUtils.replace( + spec, + "%%PARTITIONS_SPEC%%", + jsonMapper.writeValueAsString(partitionsSpec) + ); + + return spec; + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + doIndexTest( + indexDatasource, + INDEX_TASK, + specPathsTransform, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + + doReindexTest( + indexDatasource, + reindexDatasource, + REINDEX_TASK, + REINDEX_QUERIES_RESOURCE + ); } } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java new file mode 100644 index 00000000000..01aa8e006d1 --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToHdfsHadoopIndexTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the bucket, path, and region for your data. + * This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath + * and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file. + * 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys. + * 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1. + * 4) Provide -Doverride.config.path= with s3 credentials and hdfs deep storage configs set. See + * integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs for env vars to provide. + * 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command + */ +@Test(groups = TestNGGroup.HADOOP_S3_TO_HDFS) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITS3InputToHdfsHadoopIndexTest extends AbstractS3InputHadoopIndexTest +{ + @Test() + public void testS3IndexData() throws Exception + { + doTest(); + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java new file mode 100644 index 00000000000..27cedcdc75d --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITS3InputToS3HadoopIndexTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.hadoop; + +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +/** + * IMPORTANT: + * To run this test, you must: + * 1) Set the bucket, path, and region for your data. + * This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath + * and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file. + * 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys. + * 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json + * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1. + * 4) Provide -Doverride.config.path= with s3 credentials and hdfs deep storage configs set. See + * integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 for env vars to provide. + * 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command + */ +@Test(groups = TestNGGroup.HADOOP_S3_TO_S3) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITS3InputToS3HadoopIndexTest extends AbstractS3InputHadoopIndexTest +{ + @Test() + public void testS3IndexData() throws Exception + { + doTest(); + } +} diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index beaeac5330f..7309e7c8641 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -85,14 +85,14 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class); @Inject - IntegrationTestingConfig config; + protected IntegrationTestingConfig config; @Inject protected SqlTestQueryHelper sqlQueryHelper; @Inject ClientInfoResourceTestClient clientInfoResourceTestClient; - void doIndexTest( + protected void doIndexTest( String dataSource, String indexTaskFilePath, String queryFilePath, @@ -104,7 +104,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest doIndexTest(dataSource, indexTaskFilePath, Function.identity(), queryFilePath, waitForNewVersion, runTestQueries, waitForSegmentsToLoad); } - void doIndexTest( + protected void doIndexTest( String dataSource, String indexTaskFilePath, Function taskSpecTransform, @@ -151,7 +151,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest } } - void doReindexTest( + protected void doReindexTest( String baseDataSource, String reindexDataSource, String reindexTaskFilePath, diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop.data b/integration-tests/src/test/resources/data/batch_index/tsv/batch_hadoop.data similarity index 100% rename from integration-tests/src/test/resources/hadoop/batch_hadoop.data rename to integration-tests/src/test/resources/data/batch_index/tsv/batch_hadoop.data diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json index adcc60b4256..465a2239379 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json @@ -2,9 +2,9 @@ "type": "index_hadoop", "spec": { "dataSchema": { - "dataSource": "batchHadoop", + "dataSource": "%%DATASOURCE%%", "parser": { - "type": "string", + "type": "hadoopyString", "parseSpec": { "format": "tsv", "timestampSpec": { @@ -53,7 +53,7 @@ "type": "hadoop", "inputSpec": { "type": "static", - "paths": "%%HADOOP_TEST_PATH%%" + "paths": "/batch_index/tsv/batch_hadoop.data" } }, "tuningConfig": { @@ -64,7 +64,19 @@ "type": "hashed" }, "jobProperties": { - "fs.permissions.umask-mode": "022" + "fs.permissions.umask-mode": "022", + "fs.default.name" : "hdfs://druid-it-hadoop:9000", + "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", + "dfs.datanode.address" : "druid-it-hadoop", + "dfs.client.use.datanode.hostname" : "true", + "dfs.datanode.use.datanode.hostname" : "true", + "yarn.resourcemanager.hostname" : "druid-it-hadoop", + "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.job.user.classpath.first" : "true", + "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.map.memory.mb" : 1024, + "mapreduce.reduce.memory.mb" : 1024 }, "rowFlushBoundary": 10000 } diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json new file mode 100644 index 00000000000..60bd4954271 --- /dev/null +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json @@ -0,0 +1,107 @@ +{ + "type": "index_hadoop", + "hadoopDependencyCoordinates" : ["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"], + "spec": { + "dataSchema": { + "dataSource": "%%DATASOURCE%%", + "parser": { + "type": "hadoopyString", + "parseSpec": { + "format" : "json", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + } + } + }, + "metricsSpec": [ + { + "type": "count", + "name": "count" + }, + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + }, + { + "name": "thetaSketch", + "type": "thetaSketch", + "fieldName": "user" + }, + { + "name": "quantilesDoublesSketch", + "type": "quantilesDoublesSketch", + "fieldName": "delta" + }, + { + "name": "HLLSketchBuild", + "type": "HLLSketchBuild", + "fieldName": "user" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-02" ] + } + }, + "ioConfig": { + "type": "hadoop", + "inputSpec": { + "type": "static", + "paths": "%%INPUT_PATHS%%" + } + }, + "tuningConfig": { + "type": "hadoop", + "partitionsSpec": { + "assumeGrouped": true, + "targetPartitionSize": 75000, + "type": "hashed" + }, + "jobProperties": { + "fs.permissions.umask-mode": "022", + "fs.default.name" : "hdfs://druid-it-hadoop:9000", + "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", + "dfs.datanode.address" : "druid-it-hadoop", + "dfs.client.use.datanode.hostname" : "true", + "dfs.datanode.use.datanode.hostname" : "true", + "yarn.resourcemanager.hostname" : "druid-it-hadoop", + "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D", + "mapreduce.job.user.classpath.first" : "true", + "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.map.memory.mb" : 1024, + "mapreduce.reduce.memory.mb" : 1024, + "fs.azure.account.key.%%AZURE_ACCOUNT%%.blob.core.windows.net":"%%AZURE_KEY%%" + }, + "rowFlushBoundary": 10000 + } + } +} \ No newline at end of file diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json new file mode 100644 index 00000000000..03a1292105a --- /dev/null +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json @@ -0,0 +1,113 @@ +{ + "type": "index_hadoop", + "spec": { + "dataSchema": { + "dataSource": "%%DATASOURCE%%", + "parser": { + "type": "hadoopyString", + "parseSpec": { + "format" : "json", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + } + } + }, + "metricsSpec": [ + { + "type": "count", + "name": "count" + }, + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + }, + { + "name": "thetaSketch", + "type": "thetaSketch", + "fieldName": "user" + }, + { + "name": "quantilesDoublesSketch", + "type": "quantilesDoublesSketch", + "fieldName": "delta" + }, + { + "name": "HLLSketchBuild", + "type": "HLLSketchBuild", + "fieldName": "user" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-02" ] + } + }, + "ioConfig": { + "type": "hadoop", + "inputSpec": { + "type": "static", + "paths": "%%INPUT_PATHS%%" + } + }, + "tuningConfig": { + "type": "hadoop", + "partitionsSpec": { + "assumeGrouped": true, + "targetPartitionSize": 75000, + "type": "hashed" + }, + "jobProperties": { + "fs.permissions.umask-mode": "022", + "fs.default.name" : "hdfs://druid-it-hadoop:9000", + "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", + "dfs.datanode.address" : "druid-it-hadoop", + "dfs.client.use.datanode.hostname" : "true", + "dfs.datanode.use.datanode.hostname" : "true", + "yarn.resourcemanager.hostname" : "druid-it-hadoop", + "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D", + "mapreduce.job.user.classpath.first" : "true", + "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.map.memory.mb" : 1024, + "mapreduce.reduce.memory.mb" : 1024, + "fs.gs.auth.service.account.json.keyfile":"%%GCS_KEYFILE_PATH%%", + "fs.gs.working.dir":"/", + "fs.gs.path.encoding":"uri-path", + "fs.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem", + "fs.AbstractFileSystem.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS", + "fs.gs.auth.service.account.enable": "true", + "mapred.child.env":"GOOGLE_APPLICATION_CREDENTIALS=%%GCS_KEYFILE_PATH%%", + "fs.gs.reported.permissions":"777" + }, + "rowFlushBoundary": 10000 + } + } +} \ No newline at end of file diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json new file mode 100644 index 00000000000..d30214b9314 --- /dev/null +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json @@ -0,0 +1,101 @@ +{ + "type": "index_hadoop", + "spec": { + "dataSchema": { + "dataSource": "%%DATASOURCE%%", + "parser": { + "type": "hadoopyString", + "parseSpec": { + "format" : "json", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + } + } + }, + "metricsSpec": [ + { + "type": "count", + "name": "count" + }, + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + }, + { + "name": "thetaSketch", + "type": "thetaSketch", + "fieldName": "user" + }, + { + "name": "quantilesDoublesSketch", + "type": "quantilesDoublesSketch", + "fieldName": "delta" + }, + { + "name": "HLLSketchBuild", + "type": "HLLSketchBuild", + "fieldName": "user" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-02" ] + } + }, + "ioConfig": { + "type": "hadoop", + "inputSpec": { + "type": "static", + "paths": "%%INPUT_PATHS%%" + } + }, + "tuningConfig": { + "type": "hadoop", + "partitionsSpec": %%PARTITIONS_SPEC%%, + "jobProperties": { + "fs.permissions.umask-mode": "022", + "fs.default.name" : "hdfs://druid-it-hadoop:9000", + "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", + "dfs.datanode.address" : "druid-it-hadoop", + "dfs.client.use.datanode.hostname" : "true", + "dfs.datanode.use.datanode.hostname" : "true", + "yarn.resourcemanager.hostname" : "druid-it-hadoop", + "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.job.user.classpath.first" : "true", + "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.map.memory.mb" : 1024, + "mapreduce.reduce.memory.mb" : 1024 + }, + "rowFlushBoundary": 10000 + } + } +} \ No newline at end of file diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json new file mode 100644 index 00000000000..cf44540a6b4 --- /dev/null +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json @@ -0,0 +1,77 @@ +{ + "type": "index_hadoop", + "spec": { + "dataSchema": { + "dataSource": "%%REINDEX_DATASOURCE%%", + "parser": { + "type": "hadoopyString", + "parseSpec": { + "format" : "json", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensionExclusions" : ["robot", "continent"] + } + } + }, + "metricsSpec": [ + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-01" ] + } + }, + "ioConfig": { + "type": "hadoop", + "inputSpec": { + "type": "dataSource", + "ingestionSpec": { + "dataSource": "%%DATASOURCE%%", + "intervals": ["2013-08-31/2013-09-01"] + } + } + }, + "tuningConfig": { + "type": "hadoop", + "partitionsSpec": { + "assumeGrouped": true, + "targetPartitionSize": 75000, + "type": "hashed" + }, + "jobProperties": { + "fs.permissions.umask-mode": "022", + "fs.default.name" : "hdfs://druid-it-hadoop:9000", + "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", + "dfs.datanode.address" : "druid-it-hadoop", + "dfs.client.use.datanode.hostname" : "true", + "dfs.datanode.use.datanode.hostname" : "true", + "yarn.resourcemanager.hostname" : "druid-it-hadoop", + "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.job.user.classpath.first" : "true", + "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.map.memory.mb" : 1024, + "mapreduce.reduce.memory.mb" : 1024 + }, + "rowFlushBoundary": 10000 + } + } +} \ No newline at end of file diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json new file mode 100644 index 00000000000..d18b6da1e4e --- /dev/null +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json @@ -0,0 +1,114 @@ +{ + "type": "index_hadoop", + "spec": { + "dataSchema": { + "dataSource": "%%DATASOURCE%%", + "parser": { + "type": "hadoopyString", + "parseSpec": { + "format" : "json", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + } + } + }, + "metricsSpec": [ + { + "type": "count", + "name": "count" + }, + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + }, + { + "name": "thetaSketch", + "type": "thetaSketch", + "fieldName": "user" + }, + { + "name": "quantilesDoublesSketch", + "type": "quantilesDoublesSketch", + "fieldName": "delta" + }, + { + "name": "HLLSketchBuild", + "type": "HLLSketchBuild", + "fieldName": "user" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-02" ] + } + }, + "ioConfig": { + "type": "hadoop", + "inputSpec": { + "type": "static", + "paths": "%%INPUT_PATHS%%" + } + }, + "tuningConfig": { + "type": "hadoop", + "partitionsSpec": { + "assumeGrouped": true, + "targetPartitionSize": 75000, + "type": "hashed" + }, + "jobProperties": { + "fs.permissions.umask-mode": "022", + "fs.default.name" : "hdfs://druid-it-hadoop:9000", + "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", + "dfs.datanode.address" : "druid-it-hadoop", + "dfs.client.use.datanode.hostname" : "true", + "dfs.datanode.use.datanode.hostname" : "true", + "yarn.resourcemanager.hostname" : "druid-it-hadoop", + "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%", + "mapreduce.job.user.classpath.first" : "true", + "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%", + "mapreduce.map.memory.mb" : 1024, + "mapreduce.reduce.memory.mb" : 1024, + "fs.s3.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%", + "fs.s3.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%", + "fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem", + "fs.s3n.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%", + "fs.s3n.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%", + "fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem", + "fs.s3a.access.key" : "%%AWS_ACCESS_KEY%%", + "fs.s3a.secret.key" : "%%AWS_SECRET_KEY%%", + "fs.s3a.impl" : "org.apache.hadoop.fs.s3a.S3AFileSystem" + }, + "rowFlushBoundary": 10000 + } + } +} \ No newline at end of file diff --git a/integration-tests/src/test/resources/testng.xml b/integration-tests/src/test/resources/testng.xml index 88c64158978..b27549bc2d7 100644 --- a/integration-tests/src/test/resources/testng.xml +++ b/integration-tests/src/test/resources/testng.xml @@ -28,7 +28,6 @@ -